From d2825fa9365d0101571ed16534b16b7c8d261ab3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 13 Mar 2022 21:11:01 -0600 Subject: [PATCH 001/116] crypto: sm3,sm4 - move into crypto directory The lib/crypto libraries live in lib because they are used by various drivers of the kernel. In contrast, the various helper functions in crypto are there because they're used exclusively by the crypto API. The SM3 and SM4 helper functions were erroniously moved into lib/crypto/ instead of crypto/, even though there are no in-kernel users outside of the crypto API of those functions. This commit moves them into crypto/. Cc: Herbert Xu Cc: Tianjia Zhang Cc: Eric Biggers Signed-off-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 4 ++-- crypto/Kconfig | 18 ++++++++++++------ crypto/Makefile | 6 ++++-- {lib/crypto => crypto}/sm3.c | 0 {lib/crypto => crypto}/sm4.c | 0 lib/crypto/Kconfig | 6 ------ lib/crypto/Makefile | 6 ------ 7 files changed, 18 insertions(+), 22 deletions(-) rename {lib/crypto => crypto}/sm3.c (100%) rename {lib/crypto => crypto}/sm4.c (100%) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 2a965aa0188d..454621a20eaa 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -45,13 +45,13 @@ config CRYPTO_SM3_ARM64_CE tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH - select CRYPTO_LIB_SM3 + select CRYPTO_SM3 config CRYPTO_SM4_ARM64_CE tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_LIB_SM4 + select CRYPTO_SM4 config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" diff --git a/crypto/Kconfig b/crypto/Kconfig index 41068811fd0e..19197469cfab 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -274,7 +274,7 @@ config CRYPTO_ECRDSA config CRYPTO_SM2 tristate "SM2 algorithm" - select CRYPTO_LIB_SM3 + select CRYPTO_SM3 select CRYPTO_AKCIPHER select CRYPTO_MANAGER select MPILIB @@ -1010,9 +1010,12 @@ config CRYPTO_SHA3 http://keccak.noekeon.org/ config CRYPTO_SM3 + tristate + +config CRYPTO_SM3_GENERIC tristate "SM3 digest algorithm" select CRYPTO_HASH - select CRYPTO_LIB_SM3 + select CRYPTO_SM3 help SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3). It is part of the Chinese Commercial Cryptography suite. @@ -1025,7 +1028,7 @@ config CRYPTO_SM3_AVX_X86_64 tristate "SM3 digest algorithm (x86_64/AVX)" depends on X86 && 64BIT select CRYPTO_HASH - select CRYPTO_LIB_SM3 + select CRYPTO_SM3 help SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3). It is part of the Chinese Commercial Cryptography suite. This is @@ -1572,9 +1575,12 @@ config CRYPTO_SERPENT_AVX2_X86_64 config CRYPTO_SM4 + tristate + +config CRYPTO_SM4_GENERIC tristate "SM4 cipher algorithm" select CRYPTO_ALGAPI - select CRYPTO_LIB_SM4 + select CRYPTO_SM4 help SM4 cipher algorithms (OSCCA GB/T 32907-2016). @@ -1603,7 +1609,7 @@ config CRYPTO_SM4_AESNI_AVX_X86_64 select CRYPTO_SKCIPHER select CRYPTO_SIMD select CRYPTO_ALGAPI - select CRYPTO_LIB_SM4 + select CRYPTO_SM4 help SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX). @@ -1624,7 +1630,7 @@ config CRYPTO_SM4_AESNI_AVX2_X86_64 select CRYPTO_SKCIPHER select CRYPTO_SIMD select CRYPTO_ALGAPI - select CRYPTO_LIB_SM4 + select CRYPTO_SM4 select CRYPTO_SM4_AESNI_AVX_X86_64 help SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2). diff --git a/crypto/Makefile b/crypto/Makefile index f754c4d17d6b..43bc33e247d1 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -78,7 +78,8 @@ obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o -obj-$(CONFIG_CRYPTO_SM3) += sm3_generic.o +obj-$(CONFIG_CRYPTO_SM3) += sm3.o +obj-$(CONFIG_CRYPTO_SM3_GENERIC) += sm3_generic.o obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 @@ -134,7 +135,8 @@ obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356 -obj-$(CONFIG_CRYPTO_SM4) += sm4_generic.o +obj-$(CONFIG_CRYPTO_SM4) += sm4.o +obj-$(CONFIG_CRYPTO_SM4_GENERIC) += sm4_generic.o obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o diff --git a/lib/crypto/sm3.c b/crypto/sm3.c similarity index 100% rename from lib/crypto/sm3.c rename to crypto/sm3.c diff --git a/lib/crypto/sm4.c b/crypto/sm4.c similarity index 100% rename from lib/crypto/sm4.c rename to crypto/sm4.c diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 379a66d7f504..9856e291f414 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -123,10 +123,4 @@ config CRYPTO_LIB_CHACHA20POLY1305 config CRYPTO_LIB_SHA256 tristate -config CRYPTO_LIB_SM3 - tristate - -config CRYPTO_LIB_SM4 - tristate - endmenu diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 6c872d05d1e6..26be2bbe09c5 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -37,12 +37,6 @@ libpoly1305-y += poly1305.o obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o libsha256-y := sha256.o -obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o -libsm3-y := sm3.o - -obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o -libsm4-y := sm4.o - ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) libblake2s-y += blake2s-selftest.o libchacha20poly1305-y += chacha20poly1305-selftest.o From 73c919d314ad57be900437fd329990b1d846b763 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 15 Mar 2022 17:44:51 +0800 Subject: [PATCH 002/116] crypto: sm4 - export sm4 constant arrays Export the constant arrays fk, ck, sbox of the SM4 algorithm, and add the 'crypto_sm4_' prefix, where sbox is used in the SM4 NEON implementation for the tbl/tbx instruction to replace the S-BOX, and the fk, ck arrays are used in the SM4 CE implementation. Use the sm4ekey instruction to speed up key expansion operations. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/sm4.c | 10 +++++++++- include/crypto/sm4.h | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/crypto/sm4.c b/crypto/sm4.c index 284e62576d0c..2c44193bc27e 100644 --- a/crypto/sm4.c +++ b/crypto/sm4.c @@ -11,7 +11,7 @@ #include #include -static const u32 fk[4] = { +static const u32 ____cacheline_aligned fk[4] = { 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc }; @@ -61,6 +61,14 @@ static const u8 ____cacheline_aligned sbox[256] = { 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 }; +extern const u32 crypto_sm4_fk[4] __alias(fk); +extern const u32 crypto_sm4_ck[32] __alias(ck); +extern const u8 crypto_sm4_sbox[256] __alias(sbox); + +EXPORT_SYMBOL(crypto_sm4_fk); +EXPORT_SYMBOL(crypto_sm4_ck); +EXPORT_SYMBOL(crypto_sm4_sbox); + static inline u32 sm4_t_non_lin_sub(u32 x) { u32 out; diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h index 709f286e7b25..9656a9a40326 100644 --- a/include/crypto/sm4.h +++ b/include/crypto/sm4.h @@ -21,6 +21,10 @@ struct sm4_ctx { u32 rkey_dec[SM4_RKEY_WORDS]; }; +extern const u32 crypto_sm4_fk[]; +extern const u32 crypto_sm4_ck[]; +extern const u8 crypto_sm4_sbox[]; + /** * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 * @ctx: The location where the computed key will be stored. From 02436762f5ff4b3f662bc196c70a563bcbc92b7d Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 15 Mar 2022 17:44:52 +0800 Subject: [PATCH 003/116] crypto: arm64/sm4-ce - rename to sm4-ce-cipher The subsequent patches of the series will have an implementation of SM4-ECB/CBC/CFB/CTR accelerated by the CE instruction set, which conflicts with the current module name. In order to keep the naming rules of the AES algorithm consistent, the sm4-ce algorithm is renamed to sm4-ce-cipher. In addition, the speed of sm4-ce-cipher is better than that of SM4 NEON. By the way, the priority of the algorithm is adjusted to 300, which is also to leave room for the priority of SM4 NEON. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Makefile | 4 ++-- arch/arm64/crypto/{sm4-ce-core.S => sm4-ce-cipher-core.S} | 0 arch/arm64/crypto/{sm4-ce-glue.c => sm4-ce-cipher-glue.c} | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename arch/arm64/crypto/{sm4-ce-core.S => sm4-ce-cipher-core.S} (100%) rename arch/arm64/crypto/{sm4-ce-glue.c => sm4-ce-cipher-glue.c} (98%) diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 09a805cc32d7..85863e610a2e 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -20,8 +20,8 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o -obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o -sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce-cipher.o +sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-cipher-core.S similarity index 100% rename from arch/arm64/crypto/sm4-ce-core.S rename to arch/arm64/crypto/sm4-ce-cipher-core.S diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c similarity index 98% rename from arch/arm64/crypto/sm4-ce-glue.c rename to arch/arm64/crypto/sm4-ce-cipher-glue.c index 9c93cfc4841b..76a34ef4abbb 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c @@ -54,7 +54,7 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static struct crypto_alg sm4_ce_alg = { .cra_name = "sm4", .cra_driver_name = "sm4-ce", - .cra_priority = 200, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_ctx), From 4f1aef9b806f58ef76fdac0b4d9cfab6e66aeef1 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 15 Mar 2022 17:44:53 +0800 Subject: [PATCH 004/116] crypto: arm64/sm4 - add ARMv8 NEON implementation This adds ARMv8 NEON implementations of SM4 in ECB, CBC, CFB and CTR modes. This implementation uses the plain NEON instruction set, All S-BOX substitutions uses the tbl/tbx instructions of ARMv8, combined with the out-of-order execution in CPU, this optimization supports encryption of up to 8 blocks at the same time. The performance of encrypting one block is not as good as software implementation, so the encryption operations of CBC and CFB still use pure software algorithms. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 6 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/sm4-neon-core.S | 487 ++++++++++++++++++++++++++++++ arch/arm64/crypto/sm4-neon-glue.c | 442 +++++++++++++++++++++++++++ 4 files changed, 938 insertions(+) create mode 100644 arch/arm64/crypto/sm4-neon-core.S create mode 100644 arch/arm64/crypto/sm4-neon-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 454621a20eaa..d62dd54d1800 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -53,6 +53,12 @@ config CRYPTO_SM4_ARM64_CE select CRYPTO_ALGAPI select CRYPTO_SM4 +config CRYPTO_SM4_ARM64_NEON_BLK + tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions" + depends on KERNEL_MODE_NEON + select CRYPTO_SKCIPHER + select CRYPTO_LIB_SM4 + config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 85863e610a2e..41aee6103e78 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -23,6 +23,9 @@ sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce-cipher.o sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o +sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o + obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o diff --git a/arch/arm64/crypto/sm4-neon-core.S b/arch/arm64/crypto/sm4-neon-core.S new file mode 100644 index 000000000000..3d5256b354d2 --- /dev/null +++ b/arch/arm64/crypto/sm4-neon-core.S @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm for ARMv8 NEON + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2022, Alibaba Group. + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include + +/* Register macros */ + +#define RTMP0 v8 +#define RTMP1 v9 +#define RTMP2 v10 +#define RTMP3 v11 + +#define RX0 v12 +#define RX1 v13 +#define RKEY v14 +#define RIV v15 + +/* Helper macros. */ + +#define PREPARE \ + adr_l x5, crypto_sm4_sbox; \ + ld1 {v16.16b-v19.16b}, [x5], #64; \ + ld1 {v20.16b-v23.16b}, [x5], #64; \ + ld1 {v24.16b-v27.16b}, [x5], #64; \ + ld1 {v28.16b-v31.16b}, [x5]; + +#define transpose_4x4(s0, s1, s2, s3) \ + zip1 RTMP0.4s, s0.4s, s1.4s; \ + zip1 RTMP1.4s, s2.4s, s3.4s; \ + zip2 RTMP2.4s, s0.4s, s1.4s; \ + zip2 RTMP3.4s, s2.4s, s3.4s; \ + zip1 s0.2d, RTMP0.2d, RTMP1.2d; \ + zip2 s1.2d, RTMP0.2d, RTMP1.2d; \ + zip1 s2.2d, RTMP2.2d, RTMP3.2d; \ + zip2 s3.2d, RTMP2.2d, RTMP3.2d; + +#define rotate_clockwise_90(s0, s1, s2, s3) \ + zip1 RTMP0.4s, s1.4s, s0.4s; \ + zip2 RTMP1.4s, s1.4s, s0.4s; \ + zip1 RTMP2.4s, s3.4s, s2.4s; \ + zip2 RTMP3.4s, s3.4s, s2.4s; \ + zip1 s0.2d, RTMP2.2d, RTMP0.2d; \ + zip2 s1.2d, RTMP2.2d, RTMP0.2d; \ + zip1 s2.2d, RTMP3.2d, RTMP1.2d; \ + zip2 s3.2d, RTMP3.2d, RTMP1.2d; + +#define ROUND4(round, s0, s1, s2, s3) \ + dup RX0.4s, RKEY.s[round]; \ + /* rk ^ s1 ^ s2 ^ s3 */ \ + eor RTMP1.16b, s2.16b, s3.16b; \ + eor RX0.16b, RX0.16b, s1.16b; \ + eor RX0.16b, RX0.16b, RTMP1.16b; \ + \ + /* sbox, non-linear part */ \ + movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \ + tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \ + \ + /* linear part */ \ + shl RTMP1.4s, RTMP0.4s, #8; \ + shl RTMP2.4s, RTMP0.4s, #16; \ + shl RTMP3.4s, RTMP0.4s, #24; \ + sri RTMP1.4s, RTMP0.4s, #(32-8); \ + sri RTMP2.4s, RTMP0.4s, #(32-16); \ + sri RTMP3.4s, RTMP0.4s, #(32-24); \ + /* RTMP1 = x ^ rol32(x, 8) ^ rol32(x, 16) */ \ + eor RTMP1.16b, RTMP1.16b, RTMP0.16b; \ + eor RTMP1.16b, RTMP1.16b, RTMP2.16b; \ + /* RTMP3 = x ^ rol32(x, 24) ^ rol32(RTMP1, 2) */ \ + eor RTMP3.16b, RTMP3.16b, RTMP0.16b; \ + shl RTMP2.4s, RTMP1.4s, 2; \ + sri RTMP2.4s, RTMP1.4s, #(32-2); \ + eor RTMP3.16b, RTMP3.16b, RTMP2.16b; \ + /* s0 ^= RTMP3 */ \ + eor s0.16b, s0.16b, RTMP3.16b; + +#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + \ + transpose_4x4(b0, b1, b2, b3); \ + \ + mov x6, 8; \ +4: \ + ld1 {RKEY.4s}, [x0], #16; \ + subs x6, x6, #1; \ + \ + ROUND4(0, b0, b1, b2, b3); \ + ROUND4(1, b1, b2, b3, b0); \ + ROUND4(2, b2, b3, b0, b1); \ + ROUND4(3, b3, b0, b1, b2); \ + \ + bne 4b; \ + \ + rotate_clockwise_90(b0, b1, b2, b3); \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + \ + /* repoint to rkey */ \ + sub x0, x0, #128; + +#define ROUND8(round, s0, s1, s2, s3, t0, t1, t2, t3) \ + /* rk ^ s1 ^ s2 ^ s3 */ \ + dup RX0.4s, RKEY.s[round]; \ + eor RTMP0.16b, s2.16b, s3.16b; \ + mov RX1.16b, RX0.16b; \ + eor RTMP1.16b, t2.16b, t3.16b; \ + eor RX0.16b, RX0.16b, s1.16b; \ + eor RX1.16b, RX1.16b, t1.16b; \ + eor RX0.16b, RX0.16b, RTMP0.16b; \ + eor RX1.16b, RX1.16b, RTMP1.16b; \ + \ + /* sbox, non-linear part */ \ + movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \ + tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \ + tbl RTMP1.16b, {v16.16b-v19.16b}, RX1.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + sub RX1.16b, RX1.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \ + tbx RTMP1.16b, {v20.16b-v23.16b}, RX1.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + sub RX1.16b, RX1.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \ + tbx RTMP1.16b, {v24.16b-v27.16b}, RX1.16b; \ + sub RX0.16b, RX0.16b, RTMP3.16b; \ + sub RX1.16b, RX1.16b, RTMP3.16b; \ + tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \ + tbx RTMP1.16b, {v28.16b-v31.16b}, RX1.16b; \ + \ + /* linear part */ \ + shl RX0.4s, RTMP0.4s, #8; \ + shl RX1.4s, RTMP1.4s, #8; \ + shl RTMP2.4s, RTMP0.4s, #16; \ + shl RTMP3.4s, RTMP1.4s, #16; \ + sri RX0.4s, RTMP0.4s, #(32 - 8); \ + sri RX1.4s, RTMP1.4s, #(32 - 8); \ + sri RTMP2.4s, RTMP0.4s, #(32 - 16); \ + sri RTMP3.4s, RTMP1.4s, #(32 - 16); \ + /* RX = x ^ rol32(x, 8) ^ rol32(x, 16) */ \ + eor RX0.16b, RX0.16b, RTMP0.16b; \ + eor RX1.16b, RX1.16b, RTMP1.16b; \ + eor RX0.16b, RX0.16b, RTMP2.16b; \ + eor RX1.16b, RX1.16b, RTMP3.16b; \ + /* RTMP0/1 ^= x ^ rol32(x, 24) ^ rol32(RX, 2) */ \ + shl RTMP2.4s, RTMP0.4s, #24; \ + shl RTMP3.4s, RTMP1.4s, #24; \ + sri RTMP2.4s, RTMP0.4s, #(32 - 24); \ + sri RTMP3.4s, RTMP1.4s, #(32 - 24); \ + eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \ + eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \ + shl RTMP2.4s, RX0.4s, #2; \ + shl RTMP3.4s, RX1.4s, #2; \ + sri RTMP2.4s, RX0.4s, #(32 - 2); \ + sri RTMP3.4s, RX1.4s, #(32 - 2); \ + eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \ + eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \ + /* s0/t0 ^= RTMP0/1 */ \ + eor s0.16b, s0.16b, RTMP0.16b; \ + eor t0.16b, t0.16b, RTMP1.16b; + +#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; \ + \ + transpose_4x4(b0, b1, b2, b3); \ + transpose_4x4(b4, b5, b6, b7); \ + \ + mov x6, 8; \ +8: \ + ld1 {RKEY.4s}, [x0], #16; \ + subs x6, x6, #1; \ + \ + ROUND8(0, b0, b1, b2, b3, b4, b5, b6, b7); \ + ROUND8(1, b1, b2, b3, b0, b5, b6, b7, b4); \ + ROUND8(2, b2, b3, b0, b1, b6, b7, b4, b5); \ + ROUND8(3, b3, b0, b1, b2, b7, b4, b5, b6); \ + \ + bne 8b; \ + \ + rotate_clockwise_90(b0, b1, b2, b3); \ + rotate_clockwise_90(b4, b5, b6, b7); \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; \ + \ + /* repoint to rkey */ \ + sub x0, x0, #128; + + +.align 3 +SYM_FUNC_START_LOCAL(__sm4_neon_crypt_blk1_4) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * w3: num blocks (1..4) + */ + PREPARE; + + ld1 {v0.16b}, [x2], #16; + mov v1.16b, v0.16b; + mov v2.16b, v0.16b; + mov v3.16b, v0.16b; + cmp w3, #2; + blt .Lblk4_load_input_done; + ld1 {v1.16b}, [x2], #16; + beq .Lblk4_load_input_done; + ld1 {v2.16b}, [x2], #16; + cmp w3, #3; + beq .Lblk4_load_input_done; + ld1 {v3.16b}, [x2]; + +.Lblk4_load_input_done: + SM4_CRYPT_BLK4(v0, v1, v2, v3); + + st1 {v0.16b}, [x1], #16; + cmp w3, #2; + blt .Lblk4_store_output_done; + st1 {v1.16b}, [x1], #16; + beq .Lblk4_store_output_done; + st1 {v2.16b}, [x1], #16; + cmp w3, #3; + beq .Lblk4_store_output_done; + st1 {v3.16b}, [x1]; + +.Lblk4_store_output_done: + ret; +SYM_FUNC_END(__sm4_neon_crypt_blk1_4) + +.align 3 +SYM_FUNC_START(sm4_neon_crypt_blk1_8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * w3: num blocks (1..8) + */ + cmp w3, #5; + blt __sm4_neon_crypt_blk1_4; + + PREPARE; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b}, [x2], #16; + mov v5.16b, v4.16b; + mov v6.16b, v4.16b; + mov v7.16b, v4.16b; + beq .Lblk8_load_input_done; + ld1 {v5.16b}, [x2], #16; + cmp w3, #7; + blt .Lblk8_load_input_done; + ld1 {v6.16b}, [x2], #16; + beq .Lblk8_load_input_done; + ld1 {v7.16b}, [x2]; + +.Lblk8_load_input_done: + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + cmp w3, #6; + st1 {v0.16b-v3.16b}, [x1], #64; + st1 {v4.16b}, [x1], #16; + blt .Lblk8_store_output_done; + st1 {v5.16b}, [x1], #16; + beq .Lblk8_store_output_done; + st1 {v6.16b}, [x1], #16; + cmp w3, #7; + beq .Lblk8_store_output_done; + st1 {v7.16b}, [x1]; + +.Lblk8_store_output_done: + ret; +SYM_FUNC_END(sm4_neon_crypt_blk1_8) + +.align 3 +SYM_FUNC_START(sm4_neon_crypt_blk8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * w3: nblocks (multiples of 8) + */ + PREPARE; + +.Lcrypt_loop_blk: + subs w3, w3, #8; + bmi .Lcrypt_end; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b-v7.16b}, [x2], #64; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + st1 {v0.16b-v3.16b}, [x1], #64; + st1 {v4.16b-v7.16b}, [x1], #64; + + b .Lcrypt_loop_blk; + +.Lcrypt_end: + ret; +SYM_FUNC_END(sm4_neon_crypt_blk8) + +.align 3 +SYM_FUNC_START(sm4_neon_cbc_dec_blk8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks (multiples of 8) + */ + PREPARE; + + ld1 {RIV.16b}, [x3]; + +.Lcbc_loop_blk: + subs w4, w4, #8; + bmi .Lcbc_end; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b-v7.16b}, [x2]; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + sub x2, x2, #64; + eor v0.16b, v0.16b, RIV.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v1.16b, v1.16b, RTMP0.16b; + eor v2.16b, v2.16b, RTMP1.16b; + eor v3.16b, v3.16b, RTMP2.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + eor v4.16b, v4.16b, RTMP3.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v5.16b, v5.16b, RTMP0.16b; + eor v6.16b, v6.16b, RTMP1.16b; + eor v7.16b, v7.16b, RTMP2.16b; + + mov RIV.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + b .Lcbc_loop_blk; + +.Lcbc_end: + /* store new IV */ + st1 {RIV.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_neon_cbc_dec_blk8) + +.align 3 +SYM_FUNC_START(sm4_neon_cfb_dec_blk8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks (multiples of 8) + */ + PREPARE; + + ld1 {v0.16b}, [x3]; + +.Lcfb_loop_blk: + subs w4, w4, #8; + bmi .Lcfb_end; + + ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48; + ld1 {v4.16b-v7.16b}, [x2]; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + sub x2, x2, #48; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v4.16b, v4.16b, RTMP0.16b; + eor v5.16b, v5.16b, RTMP1.16b; + eor v6.16b, v6.16b, RTMP2.16b; + eor v7.16b, v7.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + mov v0.16b, RTMP3.16b; + + b .Lcfb_loop_blk; + +.Lcfb_end: + /* store new IV */ + st1 {v0.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_neon_cfb_dec_blk8) + +.align 3 +SYM_FUNC_START(sm4_neon_ctr_enc_blk8) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nblocks (multiples of 8) + */ + PREPARE; + + ldp x7, x8, [x3]; + rev x7, x7; + rev x8, x8; + +.Lctr_loop_blk: + subs w4, w4, #8; + bmi .Lctr_end; + +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + adc x7, x7, xzr; \ + rev64 vctr.16b, vctr.16b; + + /* construct CTRs */ + inc_le128(v0); /* +0 */ + inc_le128(v1); /* +1 */ + inc_le128(v2); /* +2 */ + inc_le128(v3); /* +3 */ + inc_le128(v4); /* +4 */ + inc_le128(v5); /* +5 */ + inc_le128(v6); /* +6 */ + inc_le128(v7); /* +7 */ + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v4.16b, v4.16b, RTMP0.16b; + eor v5.16b, v5.16b, RTMP1.16b; + eor v6.16b, v6.16b, RTMP2.16b; + eor v7.16b, v7.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + b .Lctr_loop_blk; + +.Lctr_end: + /* store new CTR */ + rev x7, x7; + rev x8, x8; + stp x7, x8, [x3]; + + ret; +SYM_FUNC_END(sm4_neon_ctr_enc_blk8) diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c new file mode 100644 index 000000000000..03a6a6866a31 --- /dev/null +++ b/arch/arm64/crypto/sm4-neon-glue.c @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, using ARMv8 NEON + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2022, Alibaba Group. + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BYTES2BLKS(nbytes) ((nbytes) >> 4) +#define BYTES2BLK8(nbytes) (((nbytes) >> 4) & ~(8 - 1)) + +asmlinkage void sm4_neon_crypt_blk1_8(const u32 *rkey, u8 *dst, const u8 *src, + unsigned int nblks); +asmlinkage void sm4_neon_crypt_blk8(const u32 *rkey, u8 *dst, const u8 *src, + unsigned int nblks); +asmlinkage void sm4_neon_cbc_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_neon_cfb_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_neon_ctr_enc_blk8(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); + +static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_expandkey(ctx, key, key_len); +} + +static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLK8(nbytes); + if (nblks) { + sm4_neon_crypt_blk8(rkey, dst, src, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_neon_crypt_blk1_8(rkey, dst, src, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_ecb_do_crypt(req, ctx->rkey_enc); +} + +static int sm4_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_ecb_do_crypt(req, ctx->rkey_dec); +} + +static int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE); + sm4_crypt_block(ctx->rkey_enc, dst, dst); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLK8(nbytes); + if (nblks) { + sm4_neon_cbc_dec_blk8(ctx->rkey_dec, dst, src, + walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + u8 iv[SM4_BLOCK_SIZE]; + int i; + + sm4_neon_crypt_blk1_8(ctx->rkey_dec, keystream, + src, nblks); + + src += ((int)nblks - 2) * SM4_BLOCK_SIZE; + dst += (nblks - 1) * SM4_BLOCK_SIZE; + memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); + + for (i = nblks - 1; i > 0; i--) { + crypto_xor_cpy(dst, src, + &keystream[i * SM4_BLOCK_SIZE], + SM4_BLOCK_SIZE); + src -= SM4_BLOCK_SIZE; + dst -= SM4_BLOCK_SIZE; + } + crypto_xor_cpy(dst, walk.iv, + keystream, SM4_BLOCK_SIZE); + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + const u8 *iv = walk.iv; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= SM4_BLOCK_SIZE) { + sm4_crypt_block(ctx->rkey_enc, keystream, iv); + crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE); + iv = dst; + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + nbytes -= SM4_BLOCK_SIZE; + } + if (iv != walk.iv) + memcpy(walk.iv, iv, SM4_BLOCK_SIZE); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cfb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLK8(nbytes); + if (nblks) { + sm4_neon_cfb_dec_blk8(ctx->rkey_enc, dst, src, + walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + + memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); + if (nblks > 1) + memcpy(&keystream[SM4_BLOCK_SIZE], src, + (nblks - 1) * SM4_BLOCK_SIZE); + memcpy(walk.iv, src + (nblks - 1) * SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE); + + sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream, + keystream, nblks); + + crypto_xor_cpy(dst, src, keystream, + nblks * SM4_BLOCK_SIZE); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_ctr_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLK8(nbytes); + if (nblks) { + sm4_neon_ctr_enc_blk8(ctx->rkey_enc, dst, src, + walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + u8 keystream[SM4_BLOCK_SIZE * 8]; + int i; + + for (i = 0; i < nblks; i++) { + memcpy(&keystream[i * SM4_BLOCK_SIZE], + walk.iv, SM4_BLOCK_SIZE); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + } + sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream, + keystream, nblks); + + crypto_xor_cpy(dst, src, keystream, + nblks * SM4_BLOCK_SIZE); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static struct skcipher_alg sm4_algs[] = { + { + .base = { + .cra_name = "ecb(sm4)", + .cra_driver_name = "ecb-sm4-neon", + .cra_priority = 200, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_ecb_encrypt, + .decrypt = sm4_ecb_decrypt, + }, { + .base = { + .cra_name = "cbc(sm4)", + .cra_driver_name = "cbc-sm4-neon", + .cra_priority = 200, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = sm4_cbc_decrypt, + }, { + .base = { + .cra_name = "cfb(sm4)", + .cra_driver_name = "cfb-sm4-neon", + .cra_priority = 200, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = sm4_cfb_decrypt, + }, { + .base = { + .cra_name = "ctr(sm4)", + .cra_driver_name = "ctr-sm4-neon", + .cra_priority = 200, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_ctr_crypt, + .decrypt = sm4_ctr_crypt, + } +}; + +static int __init sm4_init(void) +{ + return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); +} + +static void __exit sm4_exit(void) +{ + crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); +} + +module_init(sm4_init); +module_exit(sm4_exit); + +MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 NEON"); +MODULE_ALIAS_CRYPTO("sm4-neon"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("ecb(sm4)"); +MODULE_ALIAS_CRYPTO("cbc(sm4)"); +MODULE_ALIAS_CRYPTO("cfb(sm4)"); +MODULE_ALIAS_CRYPTO("ctr(sm4)"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); From 5b33e0ec881c609d96c9cce63fe15e0d0af457db Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 15 Mar 2022 17:44:54 +0800 Subject: [PATCH 005/116] crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR modes using Crypto Extensions, also includes key expansion operations because the Crypto Extensions instruction is much faster than software implementations. The Crypto Extensions for SM4 can only run on ARMv8 implementations that have support for these optional extensions. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of tcrypt. The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: sm4-generic | 16 64 128 256 1024 1420 4096 ECB enc | 80.05 91.42 93.66 94.77 95.69 95.77 95.86 ECB dec | 79.98 91.41 93.64 94.76 95.66 95.77 95.85 CBC enc | 78.55 86.50 88.02 88.77 89.36 89.42 89.48 CBC dec | 76.82 89.06 91.52 92.77 93.75 93.83 93.96 CFB enc | 77.64 86.13 87.62 88.42 89.08 88.83 89.18 CFB dec | 77.57 88.34 90.36 91.45 92.34 92.00 92.44 CTR enc | 77.80 88.28 90.23 91.22 92.11 91.81 92.25 CTR dec | 77.83 88.22 90.22 91.22 92.04 91.82 92.28 sm4-neon ECB enc | 28.31 112.77 203.03 209.89 215.49 202.11 210.59 ECB dec | 28.36 113.45 203.23 210.00 215.52 202.13 210.65 CBC enc | 79.32 87.02 88.51 89.28 89.85 89.89 89.97 CBC dec | 28.29 112.20 203.30 209.82 214.99 201.51 209.95 CFB enc | 79.59 87.16 88.54 89.30 89.83 89.62 89.92 CFB dec | 28.12 111.05 202.47 209.02 214.21 210.90 209.12 CTR enc | 28.04 108.81 200.62 206.65 211.78 208.78 206.74 CTR dec | 28.02 108.82 200.45 206.62 211.78 208.74 206.70 sm4-ce-cipher ECB enc | 336.79 587.13 682.70 747.37 803.75 811.52 818.06 ECB dec | 339.18 584.52 679.72 743.68 798.82 803.83 811.54 CBC enc | 316.63 521.47 597.00 647.14 690.82 695.21 700.55 CBC dec | 291.80 503.79 585.66 640.82 689.86 695.16 701.72 CFB enc | 294.79 482.31 552.13 594.71 631.60 628.91 638.92 CFB dec | 293.09 466.44 526.56 563.17 594.41 592.26 601.97 CTR enc | 309.61 506.13 576.86 620.47 656.38 654.51 665.10 CTR dec | 306.69 505.57 576.84 620.18 657.09 654.52 665.32 sm4-ce ECB enc | 366.96 1329.81 2024.29 2755.50 3790.07 3861.91 4051.40 ECB dec | 367.30 1323.93 2018.72 2747.43 3787.39 3862.55 4052.62 CBC enc | 358.09 682.68 807.24 885.35 958.29 963.60 973.73 CBC dec | 366.51 1303.63 1978.64 2667.93 3624.53 3683.41 3856.08 CFB enc | 351.51 681.26 807.81 893.10 968.54 969.17 985.83 CFB dec | 354.98 1266.61 1929.63 2634.81 3614.23 3611.59 3841.68 CTR enc | 324.23 1121.25 1689.44 2256.70 2981.90 3007.79 3060.74 CTR dec | 324.18 1120.44 1694.31 2258.32 2982.01 3010.09 3060.99 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 6 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/sm4-ce-core.S | 660 ++++++++++++++++++++++++++++++++ arch/arm64/crypto/sm4-ce-glue.c | 372 ++++++++++++++++++ 4 files changed, 1041 insertions(+) create mode 100644 arch/arm64/crypto/sm4-ce-core.S create mode 100644 arch/arm64/crypto/sm4-ce-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index d62dd54d1800..4fe7037d2347 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -53,6 +53,12 @@ config CRYPTO_SM4_ARM64_CE select CRYPTO_ALGAPI select CRYPTO_SM4 +config CRYPTO_SM4_ARM64_CE_BLK + tristate "SM4 in ECB/CBC/CFB/CTR modes using ARMv8 Crypto Extensions" + depends on KERNEL_MODE_NEON + select CRYPTO_SKCIPHER + select CRYPTO_LIB_SM4 + config CRYPTO_SM4_ARM64_NEON_BLK tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 41aee6103e78..bea8995133b1 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -23,6 +23,9 @@ sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce-cipher.o sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o +sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o + obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S new file mode 100644 index 000000000000..934e0f093279 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -0,0 +1,660 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm for ARMv8 with Crypto Extensions + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2022, Alibaba Group. + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include + +.arch armv8-a+crypto + +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 20, 24, 25, 26, 27, 28, 29, 30, 31 + .set .Lv\b\().4s, \b +.endr + +.macro sm4e, vd, vn + .inst 0xcec08400 | (.L\vn << 5) | .L\vd +.endm + +.macro sm4ekey, vd, vn, vm + .inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd +.endm + +/* Register macros */ + +#define RTMP0 v16 +#define RTMP1 v17 +#define RTMP2 v18 +#define RTMP3 v19 + +#define RIV v20 + +/* Helper macros. */ + +#define PREPARE \ + ld1 {v24.16b-v27.16b}, [x0], #64; \ + ld1 {v28.16b-v31.16b}, [x0]; + +#define SM4_CRYPT_BLK(b0) \ + rev32 b0.16b, b0.16b; \ + sm4e b0.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + rev32 b0.16b, b0.16b; + +#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + sm4e b3.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + sm4e b3.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + sm4e b3.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + sm4e b3.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + sm4e b3.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + sm4e b3.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + sm4e b3.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + sm4e b3.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + rev64 b3.4s, b3.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + ext b3.16b, b3.16b, b3.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; + +#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + sm4e b3.4s, v24.4s; \ + sm4e b4.4s, v24.4s; \ + sm4e b5.4s, v24.4s; \ + sm4e b6.4s, v24.4s; \ + sm4e b7.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + sm4e b3.4s, v25.4s; \ + sm4e b4.4s, v25.4s; \ + sm4e b5.4s, v25.4s; \ + sm4e b6.4s, v25.4s; \ + sm4e b7.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + sm4e b3.4s, v26.4s; \ + sm4e b4.4s, v26.4s; \ + sm4e b5.4s, v26.4s; \ + sm4e b6.4s, v26.4s; \ + sm4e b7.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + sm4e b3.4s, v27.4s; \ + sm4e b4.4s, v27.4s; \ + sm4e b5.4s, v27.4s; \ + sm4e b6.4s, v27.4s; \ + sm4e b7.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + sm4e b3.4s, v28.4s; \ + sm4e b4.4s, v28.4s; \ + sm4e b5.4s, v28.4s; \ + sm4e b6.4s, v28.4s; \ + sm4e b7.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + sm4e b3.4s, v29.4s; \ + sm4e b4.4s, v29.4s; \ + sm4e b5.4s, v29.4s; \ + sm4e b6.4s, v29.4s; \ + sm4e b7.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + sm4e b3.4s, v30.4s; \ + sm4e b4.4s, v30.4s; \ + sm4e b5.4s, v30.4s; \ + sm4e b6.4s, v30.4s; \ + sm4e b7.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + sm4e b3.4s, v31.4s; \ + sm4e b4.4s, v31.4s; \ + sm4e b5.4s, v31.4s; \ + sm4e b6.4s, v31.4s; \ + sm4e b7.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + rev64 b3.4s, b3.4s; \ + rev64 b4.4s, b4.4s; \ + rev64 b5.4s, b5.4s; \ + rev64 b6.4s, b6.4s; \ + rev64 b7.4s, b7.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + ext b3.16b, b3.16b, b3.16b, #8; \ + ext b4.16b, b4.16b, b4.16b, #8; \ + ext b5.16b, b5.16b, b5.16b, #8; \ + ext b6.16b, b6.16b, b6.16b, #8; \ + ext b7.16b, b7.16b, b7.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; + + +.align 3 +SYM_FUNC_START(sm4_ce_expand_key) + /* input: + * x0: 128-bit key + * x1: rkey_enc + * x2: rkey_dec + * x3: fk array + * x4: ck array + */ + ld1 {v0.16b}, [x0]; + rev32 v0.16b, v0.16b; + ld1 {v1.16b}, [x3]; + /* load ck */ + ld1 {v24.16b-v27.16b}, [x4], #64; + ld1 {v28.16b-v31.16b}, [x4]; + + /* input ^ fk */ + eor v0.16b, v0.16b, v1.16b; + + sm4ekey v0.4s, v0.4s, v24.4s; + sm4ekey v1.4s, v0.4s, v25.4s; + sm4ekey v2.4s, v1.4s, v26.4s; + sm4ekey v3.4s, v2.4s, v27.4s; + sm4ekey v4.4s, v3.4s, v28.4s; + sm4ekey v5.4s, v4.4s, v29.4s; + sm4ekey v6.4s, v5.4s, v30.4s; + sm4ekey v7.4s, v6.4s, v31.4s; + + st1 {v0.16b-v3.16b}, [x1], #64; + st1 {v4.16b-v7.16b}, [x1]; + rev64 v7.4s, v7.4s; + rev64 v6.4s, v6.4s; + rev64 v5.4s, v5.4s; + rev64 v4.4s, v4.4s; + rev64 v3.4s, v3.4s; + rev64 v2.4s, v2.4s; + rev64 v1.4s, v1.4s; + rev64 v0.4s, v0.4s; + ext v7.16b, v7.16b, v7.16b, #8; + ext v6.16b, v6.16b, v6.16b, #8; + ext v5.16b, v5.16b, v5.16b, #8; + ext v4.16b, v4.16b, v4.16b, #8; + ext v3.16b, v3.16b, v3.16b, #8; + ext v2.16b, v2.16b, v2.16b, #8; + ext v1.16b, v1.16b, v1.16b, #8; + ext v0.16b, v0.16b, v0.16b, #8; + st1 {v7.16b}, [x2], #16; + st1 {v6.16b}, [x2], #16; + st1 {v5.16b}, [x2], #16; + st1 {v4.16b}, [x2], #16; + st1 {v3.16b}, [x2], #16; + st1 {v2.16b}, [x2], #16; + st1 {v1.16b}, [x2], #16; + st1 {v0.16b}, [x2]; + + ret; +SYM_FUNC_END(sm4_ce_expand_key) + +.align 3 +SYM_FUNC_START(sm4_ce_crypt_block) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + */ + PREPARE; + + ld1 {v0.16b}, [x2]; + SM4_CRYPT_BLK(v0); + st1 {v0.16b}, [x1]; + + ret; +SYM_FUNC_END(sm4_ce_crypt_block) + +.align 3 +SYM_FUNC_START(sm4_ce_crypt) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * w3: nblocks + */ + PREPARE; + +.Lcrypt_loop_blk: + sub w3, w3, #8; + tbnz w3, #31, .Lcrypt_tail8; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b-v7.16b}, [x2], #64; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + st1 {v0.16b-v3.16b}, [x1], #64; + st1 {v4.16b-v7.16b}, [x1], #64; + + cbz w3, .Lcrypt_end; + b .Lcrypt_loop_blk; + +.Lcrypt_tail8: + add w3, w3, #8; + cmp w3, #4; + blt .Lcrypt_tail4; + + sub w3, w3, #4; + + ld1 {v0.16b-v3.16b}, [x2], #64; + SM4_CRYPT_BLK4(v0, v1, v2, v3); + st1 {v0.16b-v3.16b}, [x1], #64; + + cbz w3, .Lcrypt_end; + +.Lcrypt_tail4: + sub w3, w3, #1; + + ld1 {v0.16b}, [x2], #16; + SM4_CRYPT_BLK(v0); + st1 {v0.16b}, [x1], #16; + + cbnz w3, .Lcrypt_tail4; + +.Lcrypt_end: + ret; +SYM_FUNC_END(sm4_ce_crypt) + +.align 3 +SYM_FUNC_START(sm4_ce_cbc_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ld1 {RIV.16b}, [x3]; + +.Lcbc_enc_loop: + sub w4, w4, #1; + + ld1 {RTMP0.16b}, [x2], #16; + eor RIV.16b, RIV.16b, RTMP0.16b; + + SM4_CRYPT_BLK(RIV); + + st1 {RIV.16b}, [x1], #16; + + cbnz w4, .Lcbc_enc_loop; + + /* store new IV */ + st1 {RIV.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_cbc_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_cbc_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ld1 {RIV.16b}, [x3]; + +.Lcbc_loop_blk: + sub w4, w4, #8; + tbnz w4, #31, .Lcbc_tail8; + + ld1 {v0.16b-v3.16b}, [x2], #64; + ld1 {v4.16b-v7.16b}, [x2]; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + sub x2, x2, #64; + eor v0.16b, v0.16b, RIV.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v1.16b, v1.16b, RTMP0.16b; + eor v2.16b, v2.16b, RTMP1.16b; + eor v3.16b, v3.16b, RTMP2.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + eor v4.16b, v4.16b, RTMP3.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v5.16b, v5.16b, RTMP0.16b; + eor v6.16b, v6.16b, RTMP1.16b; + eor v7.16b, v7.16b, RTMP2.16b; + + mov RIV.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + cbz w4, .Lcbc_end; + b .Lcbc_loop_blk; + +.Lcbc_tail8: + add w4, w4, #8; + cmp w4, #4; + blt .Lcbc_tail4; + + sub w4, w4, #4; + + ld1 {v0.16b-v3.16b}, [x2]; + + SM4_CRYPT_BLK4(v0, v1, v2, v3); + + eor v0.16b, v0.16b, RIV.16b; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v1.16b, v1.16b, RTMP0.16b; + eor v2.16b, v2.16b, RTMP1.16b; + eor v3.16b, v3.16b, RTMP2.16b; + + mov RIV.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + cbz w4, .Lcbc_end; + +.Lcbc_tail4: + sub w4, w4, #1; + + ld1 {v0.16b}, [x2]; + + SM4_CRYPT_BLK(v0); + + eor v0.16b, v0.16b, RIV.16b; + ld1 {RIV.16b}, [x2], #16; + st1 {v0.16b}, [x1], #16; + + cbnz w4, .Lcbc_tail4; + +.Lcbc_end: + /* store new IV */ + st1 {RIV.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_cbc_dec) + +.align 3 +SYM_FUNC_START(sm4_ce_cfb_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ld1 {RIV.16b}, [x3]; + +.Lcfb_enc_loop: + sub w4, w4, #1; + + SM4_CRYPT_BLK(RIV); + + ld1 {RTMP0.16b}, [x2], #16; + eor RIV.16b, RIV.16b, RTMP0.16b; + st1 {RIV.16b}, [x1], #16; + + cbnz w4, .Lcfb_enc_loop; + + /* store new IV */ + st1 {RIV.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_cfb_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_cfb_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ld1 {v0.16b}, [x3]; + +.Lcfb_loop_blk: + sub w4, w4, #8; + tbnz w4, #31, .Lcfb_tail8; + + ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48; + ld1 {v4.16b-v7.16b}, [x2]; + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + sub x2, x2, #48; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v4.16b, v4.16b, RTMP0.16b; + eor v5.16b, v5.16b, RTMP1.16b; + eor v6.16b, v6.16b, RTMP2.16b; + eor v7.16b, v7.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + mov v0.16b, RTMP3.16b; + + cbz w4, .Lcfb_end; + b .Lcfb_loop_blk; + +.Lcfb_tail8: + add w4, w4, #8; + cmp w4, #4; + blt .Lcfb_tail4; + + sub w4, w4, #4; + + ld1 {v1.16b, v2.16b, v3.16b}, [x2]; + + SM4_CRYPT_BLK4(v0, v1, v2, v3); + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + mov v0.16b, RTMP3.16b; + + cbz w4, .Lcfb_end; + +.Lcfb_tail4: + sub w4, w4, #1; + + SM4_CRYPT_BLK(v0); + + ld1 {RTMP0.16b}, [x2], #16; + eor v0.16b, v0.16b, RTMP0.16b; + st1 {v0.16b}, [x1], #16; + + mov v0.16b, RTMP0.16b; + + cbnz w4, .Lcfb_tail4; + +.Lcfb_end: + /* store new IV */ + st1 {v0.16b}, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_cfb_dec) + +.align 3 +SYM_FUNC_START(sm4_ce_ctr_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nblocks + */ + PREPARE; + + ldp x7, x8, [x3]; + rev x7, x7; + rev x8, x8; + +.Lctr_loop_blk: + sub w4, w4, #8; + tbnz w4, #31, .Lctr_tail8; + +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + adc x7, x7, xzr; \ + rev64 vctr.16b, vctr.16b; + + /* construct CTRs */ + inc_le128(v0); /* +0 */ + inc_le128(v1); /* +1 */ + inc_le128(v2); /* +2 */ + inc_le128(v3); /* +3 */ + inc_le128(v4); /* +4 */ + inc_le128(v5); /* +5 */ + inc_le128(v6); /* +6 */ + inc_le128(v7); /* +7 */ + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v4.16b, v4.16b, RTMP0.16b; + eor v5.16b, v5.16b, RTMP1.16b; + eor v6.16b, v6.16b, RTMP2.16b; + eor v7.16b, v7.16b, RTMP3.16b; + st1 {v4.16b-v7.16b}, [x1], #64; + + cbz w4, .Lctr_end; + b .Lctr_loop_blk; + +.Lctr_tail8: + add w4, w4, #8; + cmp w4, #4; + blt .Lctr_tail4; + + sub w4, w4, #4; + + /* construct CTRs */ + inc_le128(v0); /* +0 */ + inc_le128(v1); /* +1 */ + inc_le128(v2); /* +2 */ + inc_le128(v3); /* +3 */ + + SM4_CRYPT_BLK4(v0, v1, v2, v3); + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; + eor v0.16b, v0.16b, RTMP0.16b; + eor v1.16b, v1.16b, RTMP1.16b; + eor v2.16b, v2.16b, RTMP2.16b; + eor v3.16b, v3.16b, RTMP3.16b; + st1 {v0.16b-v3.16b}, [x1], #64; + + cbz w4, .Lctr_end; + +.Lctr_tail4: + sub w4, w4, #1; + + /* construct CTRs */ + inc_le128(v0); + + SM4_CRYPT_BLK(v0); + + ld1 {RTMP0.16b}, [x2], #16; + eor v0.16b, v0.16b, RTMP0.16b; + st1 {v0.16b}, [x1], #16; + + cbnz w4, .Lctr_tail4; + +.Lctr_end: + /* store new CTR */ + rev x7, x7; + rev x8, x8; + stp x7, x8, [x3]; + + ret; +SYM_FUNC_END(sm4_ce_ctr_enc) diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c new file mode 100644 index 000000000000..496d55c0d01a --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 Cipher Algorithm, using ARMv8 Crypto Extensions + * as specified in + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + * + * Copyright (C) 2022, Alibaba Group. + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BYTES2BLKS(nbytes) ((nbytes) >> 4) + +asmlinkage void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec, + const u32 *fk, const u32 *ck); +asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src); +asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src, + unsigned int nblks); +asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblks); + +static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + return 0; +} + +static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_crypt(rkey, dst, src, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_ecb_do_crypt(req, ctx->rkey_enc); +} + +static int sm4_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_ecb_do_crypt(req, ctx->rkey_dec); +} + +static int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_cfb_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_cfb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_cfb_dec(ctx->rkey_enc, dst, src, walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int sm4_ctr_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int nblks; + + kernel_neon_begin(); + + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } + + kernel_neon_end(); + + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static struct skcipher_alg sm4_algs[] = { + { + .base = { + .cra_name = "ecb(sm4)", + .cra_driver_name = "ecb-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_ecb_encrypt, + .decrypt = sm4_ecb_decrypt, + }, { + .base = { + .cra_name = "cbc(sm4)", + .cra_driver_name = "cbc-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_cbc_encrypt, + .decrypt = sm4_cbc_decrypt, + }, { + .base = { + .cra_name = "cfb(sm4)", + .cra_driver_name = "cfb-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_cfb_encrypt, + .decrypt = sm4_cfb_decrypt, + }, { + .base = { + .cra_name = "ctr(sm4)", + .cra_driver_name = "ctr-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_setkey, + .encrypt = sm4_ctr_crypt, + .decrypt = sm4_ctr_crypt, + } +}; + +static int __init sm4_init(void) +{ + return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); +} + +static void __exit sm4_exit(void) +{ + crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); +} + +module_cpu_feature_match(SM4, sm4_init); +module_exit(sm4_exit); + +MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions"); +MODULE_ALIAS_CRYPTO("sm4-ce"); +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("ecb(sm4)"); +MODULE_ALIAS_CRYPTO("cbc(sm4)"); +MODULE_ALIAS_CRYPTO("cfb(sm4)"); +MODULE_ALIAS_CRYPTO("ctr(sm4)"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); From d5db91d26af5207b18cf2a80d7a7094bd4f57896 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 16 Mar 2022 09:03:01 +0800 Subject: [PATCH 006/116] crypto: engine - Add parameter description in crypto_transfer_request() kernel-doc comment Add the description of @need_pump in crypto_transfer_request() kernel-doc comment to remove warning found by running scripts/kernel-doc, which is caused by using 'make W=1'. crypto/crypto_engine.c:260: warning: Function parameter or member 'need_pump' not described in 'crypto_transfer_request' Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Herbert Xu --- crypto/crypto_engine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c index 6056a990c9f2..bb8e77077f02 100644 --- a/crypto/crypto_engine.c +++ b/crypto/crypto_engine.c @@ -253,6 +253,7 @@ static void crypto_pump_work(struct kthread_work *work) * crypto_transfer_request - transfer the new request into the engine queue * @engine: the hardware engine * @req: the request need to be listed into the engine queue + * @need_pump: indicates whether queue the pump of request to kthread_work */ static int crypto_transfer_request(struct crypto_engine *engine, struct crypto_async_request *req, From 4cda2f4a0ee68a23cadfa8cc0fce9af548c29fe2 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Wed, 16 Mar 2022 19:26:03 +0800 Subject: [PATCH 007/116] crypto: hisilicon/qm - optimize the barrier operation A 'dma_wmb' barrier is enough to guarantee previous writes before accessing by acc device in the outer shareable domain. A 'smp_wmb' barrier is enough to guarantee previous writes before accessing by other cpus in the inner shareble domain. Signed-off-by: Hui Tang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 009132333d2b..c5c507f2d779 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -687,13 +687,13 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src) if (!IS_ENABLED(CONFIG_ARM64)) { memcpy_toio(fun_base, src, 16); - wmb(); + dma_wmb(); return; } asm volatile("ldp %0, %1, %3\n" "stp %0, %1, %2\n" - "dsb sy\n" + "dmb oshst\n" : "=&r" (tmp0), "=&r" (tmp1), "+Q" (*((char __iomem *)fun_base)) @@ -982,7 +982,7 @@ static void qm_set_qp_disable(struct hisi_qp *qp, int offset) *addr = 1; /* make sure setup is completed */ - mb(); + smp_wmb(); } static void qm_disable_qp(struct hisi_qm *qm, u32 qp_id) From f16a005cde3b1f31cad5ecba0cc810652c3874ec Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 16 Mar 2022 12:20:09 -0700 Subject: [PATCH 008/116] crypto: x86 - eliminate anonymous module_init & module_exit Eliminate anonymous module_init() and module_exit(), which can lead to confusion or ambiguity when reading System.map, crashes/oops/bugs, or an initcall_debug log. Give each of these init and exit functions unique driver-specific names to eliminate the anonymous names. Example 1: (System.map) ffffffff832fc78c t init ffffffff832fc79e t init ffffffff832fc8f8 t init Example 2: (initcall_debug log) calling init+0x0/0x12 @ 1 initcall init+0x0/0x12 returned 0 after 15 usecs calling init+0x0/0x60 @ 1 initcall init+0x0/0x60 returned 0 after 2 usecs calling init+0x0/0x9a @ 1 initcall init+0x0/0x9a returned 0 after 74 usecs Fixes: 64b94ceae8c1 ("crypto: blowfish - add x86_64 assembly implementation") Fixes: 676a38046f4f ("crypto: camellia-x86_64 - module init/exit functions should be static") Fixes: 0b95ec56ae19 ("crypto: camellia - add assembler implementation for x86_64") Fixes: 56d76c96a9f3 ("crypto: serpent - add AVX2/x86_64 assembler implementation of serpent cipher") Fixes: b9f535ffe38f ("[CRYPTO] twofish: i586 assembly version") Fixes: ff0a70fe0536 ("crypto: twofish-x86_64-3way - module init/exit functions should be static") Fixes: 8280daad436e ("crypto: twofish - add 3-way parallel x86_64 assembler implemention") Signed-off-by: Randy Dunlap Cc: Jussi Kivilinna Cc: Joachim Fritschi Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Cc: x86@kernel.org Signed-off-by: Herbert Xu --- arch/x86/crypto/blowfish_glue.c | 8 ++++---- arch/x86/crypto/camellia_glue.c | 8 ++++---- arch/x86/crypto/serpent_avx2_glue.c | 8 ++++---- arch/x86/crypto/twofish_glue.c | 8 ++++---- arch/x86/crypto/twofish_glue_3way.c | 8 ++++---- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index fda6066437aa..ba06322c1e39 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -303,7 +303,7 @@ static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); -static int __init init(void) +static int __init blowfish_init(void) { int err; @@ -327,15 +327,15 @@ static int __init init(void) return err; } -static void __exit fini(void) +static void __exit blowfish_fini(void) { crypto_unregister_alg(&bf_cipher_alg); crypto_unregister_skciphers(bf_skcipher_algs, ARRAY_SIZE(bf_skcipher_algs)); } -module_init(init); -module_exit(fini); +module_init(blowfish_init); +module_exit(blowfish_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 66c435ba9d3d..d45e9c0c42ac 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -1377,7 +1377,7 @@ static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); -static int __init init(void) +static int __init camellia_init(void) { int err; @@ -1401,15 +1401,15 @@ static int __init init(void) return err; } -static void __exit fini(void) +static void __exit camellia_fini(void) { crypto_unregister_alg(&camellia_cipher_alg); crypto_unregister_skciphers(camellia_skcipher_algs, ARRAY_SIZE(camellia_skcipher_algs)); } -module_init(init); -module_exit(fini); +module_init(camellia_init); +module_exit(camellia_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index ccf0b5fa4933..347e97f4b713 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -96,7 +96,7 @@ static struct skcipher_alg serpent_algs[] = { static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; -static int __init init(void) +static int __init serpent_avx2_init(void) { const char *feature_name; @@ -115,14 +115,14 @@ static int __init init(void) serpent_simd_algs); } -static void __exit fini(void) +static void __exit serpent_avx2_fini(void) { simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), serpent_simd_algs); } -module_init(init); -module_exit(fini); +module_init(serpent_avx2_init); +module_exit(serpent_avx2_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized"); diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index 77e06c2da83d..f9c4adc27404 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -81,18 +81,18 @@ static struct crypto_alg alg = { } }; -static int __init init(void) +static int __init twofish_glue_init(void) { return crypto_register_alg(&alg); } -static void __exit fini(void) +static void __exit twofish_glue_fini(void) { crypto_unregister_alg(&alg); } -module_init(init); -module_exit(fini); +module_init(twofish_glue_init); +module_exit(twofish_glue_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized"); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 3507cf2064f1..90454cf18e0d 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -140,7 +140,7 @@ static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); -static int __init init(void) +static int __init twofish_3way_init(void) { if (!force && is_blacklisted_cpu()) { printk(KERN_INFO @@ -154,13 +154,13 @@ static int __init init(void) ARRAY_SIZE(tf_skciphers)); } -static void __exit fini(void) +static void __exit twofish_3way_fini(void) { crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); } -module_init(init); -module_exit(fini); +module_init(twofish_3way_init); +module_exit(twofish_3way_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); From f17f9e9069f20f4400ae0bb3ee830d34104ff8f7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 26 Mar 2022 00:11:59 -0700 Subject: [PATCH 009/116] crypto: testmgr - test in-place en/decryption with two sglists As was established in the thread https://lore.kernel.org/linux-crypto/20220223080400.139367-1-gilad@benyossef.com/T/#u, many crypto API users doing in-place en/decryption don't use the same scatterlist pointers for the source and destination, but rather use separate scatterlists that point to the same memory. This case isn't tested by the self-tests, resulting in bugs. This is the natural usage of the crypto API in some cases, so requiring API users to avoid this usage is not reasonable. Therefore, update the self-tests to start testing this case. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/testmgr.c | 75 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 4948201065cc..5801a8f9f713 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -232,6 +232,20 @@ enum finalization_type { FINALIZATION_TYPE_DIGEST, /* use digest() */ }; +/* + * Whether the crypto operation will occur in-place, and if so whether the + * source and destination scatterlist pointers will coincide (req->src == + * req->dst), or whether they'll merely point to two separate scatterlists + * (req->src != req->dst) that reference the same underlying memory. + * + * This is only relevant for algorithm types that support in-place operation. + */ +enum inplace_mode { + OUT_OF_PLACE, + INPLACE_ONE_SGLIST, + INPLACE_TWO_SGLISTS, +}; + #define TEST_SG_TOTAL 10000 /** @@ -265,7 +279,7 @@ struct test_sg_division { * crypto test vector can be tested. * * @name: name of this config, logged for debugging purposes if a test fails - * @inplace: operate on the data in-place, if applicable for the algorithm type? + * @inplace_mode: whether and how to operate on the data in-place, if applicable * @req_flags: extra request_flags, e.g. CRYPTO_TFM_REQ_MAY_SLEEP * @src_divs: description of how to arrange the source scatterlist * @dst_divs: description of how to arrange the dst scatterlist, if applicable @@ -282,7 +296,7 @@ struct test_sg_division { */ struct testvec_config { const char *name; - bool inplace; + enum inplace_mode inplace_mode; u32 req_flags; struct test_sg_division src_divs[XBUFSIZE]; struct test_sg_division dst_divs[XBUFSIZE]; @@ -307,11 +321,16 @@ struct testvec_config { /* Configs for skciphers and aeads */ static const struct testvec_config default_cipher_testvec_configs[] = { { - .name = "in-place", - .inplace = true, + .name = "in-place (one sglist)", + .inplace_mode = INPLACE_ONE_SGLIST, + .src_divs = { { .proportion_of_total = 10000 } }, + }, { + .name = "in-place (two sglists)", + .inplace_mode = INPLACE_TWO_SGLISTS, .src_divs = { { .proportion_of_total = 10000 } }, }, { .name = "out-of-place", + .inplace_mode = OUT_OF_PLACE, .src_divs = { { .proportion_of_total = 10000 } }, }, { .name = "unaligned buffer, offset=1", @@ -349,7 +368,7 @@ static const struct testvec_config default_cipher_testvec_configs[] = { .key_offset = 3, }, { .name = "misaligned splits crossing pages, inplace", - .inplace = true, + .inplace_mode = INPLACE_ONE_SGLIST, .src_divs = { { .proportion_of_total = 7500, @@ -749,18 +768,39 @@ static int build_cipher_test_sglists(struct cipher_test_sglists *tsgls, iov_iter_kvec(&input, WRITE, inputs, nr_inputs, src_total_len); err = build_test_sglist(&tsgls->src, cfg->src_divs, alignmask, - cfg->inplace ? + cfg->inplace_mode != OUT_OF_PLACE ? max(dst_total_len, src_total_len) : src_total_len, &input, NULL); if (err) return err; - if (cfg->inplace) { + /* + * In-place crypto operations can use the same scatterlist for both the + * source and destination (req->src == req->dst), or can use separate + * scatterlists (req->src != req->dst) which point to the same + * underlying memory. Make sure to test both cases. + */ + if (cfg->inplace_mode == INPLACE_ONE_SGLIST) { tsgls->dst.sgl_ptr = tsgls->src.sgl; tsgls->dst.nents = tsgls->src.nents; return 0; } + if (cfg->inplace_mode == INPLACE_TWO_SGLISTS) { + /* + * For now we keep it simple and only test the case where the + * two scatterlists have identical entries, rather than + * different entries that split up the same memory differently. + */ + memcpy(tsgls->dst.sgl, tsgls->src.sgl, + tsgls->src.nents * sizeof(tsgls->src.sgl[0])); + memcpy(tsgls->dst.sgl_saved, tsgls->src.sgl, + tsgls->src.nents * sizeof(tsgls->src.sgl[0])); + tsgls->dst.sgl_ptr = tsgls->dst.sgl; + tsgls->dst.nents = tsgls->src.nents; + return 0; + } + /* Out of place */ return build_test_sglist(&tsgls->dst, cfg->dst_divs[0].proportion_of_total ? cfg->dst_divs : cfg->src_divs, @@ -995,9 +1035,19 @@ static void generate_random_testvec_config(struct testvec_config *cfg, p += scnprintf(p, end - p, "random:"); - if (prandom_u32() % 2 == 0) { - cfg->inplace = true; - p += scnprintf(p, end - p, " inplace"); + switch (prandom_u32() % 4) { + case 0: + case 1: + cfg->inplace_mode = OUT_OF_PLACE; + break; + case 2: + cfg->inplace_mode = INPLACE_ONE_SGLIST; + p += scnprintf(p, end - p, " inplace_one_sglist"); + break; + default: + cfg->inplace_mode = INPLACE_TWO_SGLISTS; + p += scnprintf(p, end - p, " inplace_two_sglists"); + break; } if (prandom_u32() % 2 == 0) { @@ -1034,7 +1084,7 @@ static void generate_random_testvec_config(struct testvec_config *cfg, cfg->req_flags); p += scnprintf(p, end - p, "]"); - if (!cfg->inplace && prandom_u32() % 2 == 0) { + if (cfg->inplace_mode == OUT_OF_PLACE && prandom_u32() % 2 == 0) { p += scnprintf(p, end - p, " dst_divs=["); p = generate_random_sgl_divisions(cfg->dst_divs, ARRAY_SIZE(cfg->dst_divs), @@ -2085,7 +2135,8 @@ static int test_aead_vec_cfg(int enc, const struct aead_testvec *vec, /* Check for the correct output (ciphertext or plaintext) */ err = verify_correct_output(&tsgls->dst, enc ? vec->ctext : vec->ptext, enc ? vec->clen : vec->plen, - vec->alen, enc || !cfg->inplace); + vec->alen, + enc || cfg->inplace_mode == OUT_OF_PLACE); if (err == -EOVERFLOW) { pr_err("alg: aead: %s %s overran dst buffer on test vector %s, cfg=\"%s\"\n", driver, op, vec_name, cfg->name); From cac32cd4f1436b0f926a9112039d3f7ce1cd6cab Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 31 Mar 2022 16:12:10 -0500 Subject: [PATCH 010/116] crypto: ccp - cache capability into psp device The results of the capability register will be used by future code at runtime rather than just initialization. Acked-by: Tom Lendacky Signed-off-by: Mario Limonciello Signed-off-by: Herbert Xu --- drivers/crypto/ccp/psp-dev.c | 37 +++++++++++++++++------------------- drivers/crypto/ccp/psp-dev.h | 5 +++++ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index ae7b44599914..8cd404121cd5 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -70,17 +70,17 @@ static unsigned int psp_get_capability(struct psp_device *psp) */ if (val == 0xffffffff) { dev_notice(psp->dev, "psp: unable to access the device: you might be running a broken BIOS.\n"); - return 0; + return -ENODEV; } + psp->capability = val; - return val; + return 0; } -static int psp_check_sev_support(struct psp_device *psp, - unsigned int capability) +static int psp_check_sev_support(struct psp_device *psp) { /* Check if device supports SEV feature */ - if (!(capability & 1)) { + if (!(psp->capability & PSP_CAPABILITY_SEV)) { dev_dbg(psp->dev, "psp does not support SEV\n"); return -ENODEV; } @@ -88,11 +88,10 @@ static int psp_check_sev_support(struct psp_device *psp, return 0; } -static int psp_check_tee_support(struct psp_device *psp, - unsigned int capability) +static int psp_check_tee_support(struct psp_device *psp) { /* Check if device supports TEE feature */ - if (!(capability & 2)) { + if (!(psp->capability & PSP_CAPABILITY_TEE)) { dev_dbg(psp->dev, "psp does not support TEE\n"); return -ENODEV; } @@ -100,11 +99,10 @@ static int psp_check_tee_support(struct psp_device *psp, return 0; } -static int psp_check_support(struct psp_device *psp, - unsigned int capability) +static int psp_check_support(struct psp_device *psp) { - int sev_support = psp_check_sev_support(psp, capability); - int tee_support = psp_check_tee_support(psp, capability); + int sev_support = psp_check_sev_support(psp); + int tee_support = psp_check_tee_support(psp); /* Return error if device neither supports SEV nor TEE */ if (sev_support && tee_support) @@ -113,17 +111,17 @@ static int psp_check_support(struct psp_device *psp, return 0; } -static int psp_init(struct psp_device *psp, unsigned int capability) +static int psp_init(struct psp_device *psp) { int ret; - if (!psp_check_sev_support(psp, capability)) { + if (!psp_check_sev_support(psp)) { ret = sev_dev_init(psp); if (ret) return ret; } - if (!psp_check_tee_support(psp, capability)) { + if (!psp_check_tee_support(psp)) { ret = tee_dev_init(psp); if (ret) return ret; @@ -136,7 +134,6 @@ int psp_dev_init(struct sp_device *sp) { struct device *dev = sp->dev; struct psp_device *psp; - unsigned int capability; int ret; ret = -ENOMEM; @@ -155,11 +152,11 @@ int psp_dev_init(struct sp_device *sp) psp->io_regs = sp->io_map; - capability = psp_get_capability(psp); - if (!capability) + ret = psp_get_capability(psp); + if (ret) goto e_disable; - ret = psp_check_support(psp, capability); + ret = psp_check_support(psp); if (ret) goto e_disable; @@ -174,7 +171,7 @@ int psp_dev_init(struct sp_device *sp) goto e_err; } - ret = psp_init(psp, capability); + ret = psp_init(psp); if (ret) goto e_irq; diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h index ef38e4135d81..d811da28cce6 100644 --- a/drivers/crypto/ccp/psp-dev.h +++ b/drivers/crypto/ccp/psp-dev.h @@ -45,6 +45,8 @@ struct psp_device { void *sev_data; void *tee_data; + + unsigned int capability; }; void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler, @@ -57,4 +59,7 @@ void psp_clear_tee_irq_handler(struct psp_device *psp); struct psp_device *psp_get_master_device(void); +#define PSP_CAPABILITY_SEV BIT(0) +#define PSP_CAPABILITY_TEE BIT(1) + #endif /* __PSP_DEV_H */ From 50c4decc1b15313afa31f9a99da0904fa9c9b071 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 31 Mar 2022 16:12:11 -0500 Subject: [PATCH 011/116] crypto: ccp - Export PSP security bits to userspace The PSP sets several pre-defined bits in the capabilities register to indicate that security attributes of the platform. Export these attributes into userspace for administrators to confirm platform is properly locked down. Acked-by: Tom Lendacky Signed-off-by: Mario Limonciello Signed-off-by: Herbert Xu --- Documentation/ABI/testing/sysfs-driver-ccp | 87 ++++++++++++++++++++++ drivers/crypto/ccp/psp-dev.h | 17 +++++ drivers/crypto/ccp/sp-pci.c | 62 +++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-driver-ccp diff --git a/Documentation/ABI/testing/sysfs-driver-ccp b/Documentation/ABI/testing/sysfs-driver-ccp new file mode 100644 index 000000000000..7aded9b75553 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-ccp @@ -0,0 +1,87 @@ +What: /sys/bus/pci/devices//fused_part +Date: June 2022 +KernelVersion: 5.19 +Contact: mario.limonciello@amd.com +Description: + The /sys/bus/pci/devices//fused_part file reports + whether the CPU or APU has been fused to prevent tampering. + 0: Not fused + 1: Fused + +What: /sys/bus/pci/devices//debug_lock_on +Date: June 2022 +KernelVersion: 5.19 +Contact: mario.limonciello@amd.com +Description: + The /sys/bus/pci/devices//debug_lock_on reports + whether the AMD CPU or APU has been unlocked for debugging. + Possible values: + 0: Not locked + 1: Locked + +What: /sys/bus/pci/devices//tsme_status +Date: June 2022 +KernelVersion: 5.19 +Contact: mario.limonciello@amd.com +Description: + The /sys/bus/pci/devices//tsme_status file reports + the status of transparent secure memory encryption on AMD systems. + Possible values: + 0: Not active + 1: Active + +What: /sys/bus/pci/devices//anti_rollback_status +Date: June 2022 +KernelVersion: 5.19 +Contact: mario.limonciello@amd.com +Description: + The /sys/bus/pci/devices//anti_rollback_status file reports + whether the PSP is enforcing rollback protection. + Possible values: + 0: Not enforcing + 1: Enforcing + +What: /sys/bus/pci/devices//rpmc_production_enabled +Date: June 2022 +KernelVersion: 5.19 +Contact: mario.limonciello@amd.com +Description: + The /sys/bus/pci/devices//rpmc_production_enabled file reports + whether Replay Protected Monotonic Counter support has been enabled. + Possible values: + 0: Not enabled + 1: Enabled + +What: /sys/bus/pci/devices//rpmc_spirom_available +Date: June 2022 +KernelVersion: 5.19 +Contact: mario.limonciello@amd.com +Description: + The /sys/bus/pci/devices//rpmc_spirom_available file reports + whether an Replay Protected Monotonic Counter supported SPI is installed + on the system. + Possible values: + 0: Not present + 1: Present + +What: /sys/bus/pci/devices//hsp_tpm_available +Date: June 2022 +KernelVersion: 5.19 +Contact: mario.limonciello@amd.com +Description: + The /sys/bus/pci/devices//hsp_tpm_available file reports + whether the HSP TPM has been activated. + Possible values: + 0: Not activated or present + 1: Activated + +What: /sys/bus/pci/devices//rom_armor_enforced +Date: June 2022 +KernelVersion: 5.19 +Contact: mario.limonciello@amd.com +Description: + The /sys/bus/pci/devices//rom_armor_enforced file reports + whether RomArmor SPI protection is enforced. + Possible values: + 0: Not enforced + 1: Enforced diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h index d811da28cce6..d528eb04c3ef 100644 --- a/drivers/crypto/ccp/psp-dev.h +++ b/drivers/crypto/ccp/psp-dev.h @@ -61,5 +61,22 @@ struct psp_device *psp_get_master_device(void); #define PSP_CAPABILITY_SEV BIT(0) #define PSP_CAPABILITY_TEE BIT(1) +#define PSP_CAPABILITY_PSP_SECURITY_REPORTING BIT(7) + +#define PSP_CAPABILITY_PSP_SECURITY_OFFSET 8 +/* + * The PSP doesn't directly store these bits in the capability register + * but instead copies them from the results of query command. + * + * The offsets from the query command are below, and shifted when used. + */ +#define PSP_SECURITY_FUSED_PART BIT(0) +#define PSP_SECURITY_DEBUG_LOCK_ON BIT(2) +#define PSP_SECURITY_TSME_STATUS BIT(5) +#define PSP_SECURITY_ANTI_ROLLBACK_STATUS BIT(7) +#define PSP_SECURITY_RPMC_PRODUCTION_ENABLED BIT(8) +#define PSP_SECURITY_RPMC_SPIROM_AVAILABLE BIT(9) +#define PSP_SECURITY_HSP_TPM_AVAILABLE BIT(10) +#define PSP_SECURITY_ROM_ARMOR_ENFORCED BIT(11) #endif /* __PSP_DEV_H */ diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 88c672ad27e4..b5970ae54d0e 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -32,6 +32,67 @@ struct sp_pci { }; static struct sp_device *sp_dev_master; +#define attribute_show(name, def) \ +static ssize_t name##_show(struct device *d, struct device_attribute *attr, \ + char *buf) \ +{ \ + struct sp_device *sp = dev_get_drvdata(d); \ + struct psp_device *psp = sp->psp_data; \ + int bit = PSP_SECURITY_##def << PSP_CAPABILITY_PSP_SECURITY_OFFSET; \ + return sysfs_emit(buf, "%d\n", (psp->capability & bit) > 0); \ +} + +attribute_show(fused_part, FUSED_PART) +static DEVICE_ATTR_RO(fused_part); +attribute_show(debug_lock_on, DEBUG_LOCK_ON) +static DEVICE_ATTR_RO(debug_lock_on); +attribute_show(tsme_status, TSME_STATUS) +static DEVICE_ATTR_RO(tsme_status); +attribute_show(anti_rollback_status, ANTI_ROLLBACK_STATUS) +static DEVICE_ATTR_RO(anti_rollback_status); +attribute_show(rpmc_production_enabled, RPMC_PRODUCTION_ENABLED) +static DEVICE_ATTR_RO(rpmc_production_enabled); +attribute_show(rpmc_spirom_available, RPMC_SPIROM_AVAILABLE) +static DEVICE_ATTR_RO(rpmc_spirom_available); +attribute_show(hsp_tpm_available, HSP_TPM_AVAILABLE) +static DEVICE_ATTR_RO(hsp_tpm_available); +attribute_show(rom_armor_enforced, ROM_ARMOR_ENFORCED) +static DEVICE_ATTR_RO(rom_armor_enforced); + +static struct attribute *psp_attrs[] = { + &dev_attr_fused_part.attr, + &dev_attr_debug_lock_on.attr, + &dev_attr_tsme_status.attr, + &dev_attr_anti_rollback_status.attr, + &dev_attr_rpmc_production_enabled.attr, + &dev_attr_rpmc_spirom_available.attr, + &dev_attr_hsp_tpm_available.attr, + &dev_attr_rom_armor_enforced.attr, + NULL +}; + +static umode_t psp_security_is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + struct device *dev = kobj_to_dev(kobj); + struct sp_device *sp = dev_get_drvdata(dev); + struct psp_device *psp = sp->psp_data; + + if (psp && (psp->capability & PSP_CAPABILITY_PSP_SECURITY_REPORTING)) + return 0444; + + return 0; +} + +static struct attribute_group psp_attr_group = { + .attrs = psp_attrs, + .is_visible = psp_security_is_visible, +}; + +static const struct attribute_group *psp_groups[] = { + &psp_attr_group, + NULL, +}; + static int sp_get_msix_irqs(struct sp_device *sp) { struct sp_pci *sp_pci = sp->dev_specific; @@ -391,6 +452,7 @@ static struct pci_driver sp_pci_driver = { .remove = sp_pci_remove, .shutdown = sp_pci_shutdown, .driver.pm = &sp_pci_pm_ops, + .dev_groups = psp_groups, }; int sp_pci_init(void) From 84ee393b1e82628ac7f183d8a68d8ac2cf0ed876 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 31 Mar 2022 16:12:12 -0500 Subject: [PATCH 012/116] crypto: ccp - Allow PSP driver to load without SEV/TEE support Previously the PSP probe routine would fail if both SEV and TEE were missing. This is possibly the case for some client parts. As capabilities can now be accessed from userspace, it may still be useful to have the PSP driver finish loading so that those capabilities can be read. Signed-off-by: Mario Limonciello Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/psp-dev.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index 8cd404121cd5..a3b7b5130be4 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -99,18 +99,6 @@ static int psp_check_tee_support(struct psp_device *psp) return 0; } -static int psp_check_support(struct psp_device *psp) -{ - int sev_support = psp_check_sev_support(psp); - int tee_support = psp_check_tee_support(psp); - - /* Return error if device neither supports SEV nor TEE */ - if (sev_support && tee_support) - return -ENODEV; - - return 0; -} - static int psp_init(struct psp_device *psp) { int ret; @@ -156,10 +144,6 @@ int psp_dev_init(struct sp_device *sp) if (ret) goto e_disable; - ret = psp_check_support(psp); - if (ret) - goto e_disable; - /* Disable and clear interrupts until ready */ iowrite32(0, psp->io_regs + psp->vdata->inten_reg); iowrite32(-1, psp->io_regs + psp->vdata->intsts_reg); From 4e2c87949f2b9909d3daa8d9cd4b6d5077b6e0c2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 31 Mar 2022 16:12:13 -0500 Subject: [PATCH 013/116] crypto: ccp - When TSME and SME both detected notify user CC_ATTR_HOST_MEM_ENCRYPT is used to relay that memory encryption has been activated by the kernel. As it's technically possible to enable both SME and TSME at the same time, detect this scenario and notify the user that enabling TSME and SME at the same time is unnecessary. Signed-off-by: Mario Limonciello Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/psp-dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index a3b7b5130be4..c9c741ac8442 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -74,6 +74,12 @@ static unsigned int psp_get_capability(struct psp_device *psp) } psp->capability = val; + /* Detect if TSME and SME are both enabled */ + if (psp->capability & PSP_CAPABILITY_PSP_SECURITY_REPORTING && + psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET) && + cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n"); + return 0; } From 7b2206d8809259b0dd297f0ccf017bf2dea98a02 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Thu, 31 Mar 2022 23:59:10 +0200 Subject: [PATCH 014/116] crypto: cavium/nitrox - remove check of list iterator against head past the loop body When list_for_each_entry() completes the iteration over the whole list without breaking the loop, the iterator value will be a bogus pointer computed based on the head element. While it is safe to use the pointer to determine if it was computed based on the head element, either with list_entry_is_head() or &pos->member == head, using the iterator variable after the loop should be avoided. In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to point to the found element [1]. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Signed-off-by: Jakob Koschel Signed-off-by: Herbert Xu --- drivers/crypto/cavium/nitrox/nitrox_main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index 6c61817996a3..432a61aca0c5 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -269,15 +269,17 @@ static void nitrox_remove_from_devlist(struct nitrox_device *ndev) struct nitrox_device *nitrox_get_first_device(void) { - struct nitrox_device *ndev; + struct nitrox_device *ndev = NULL, *iter; mutex_lock(&devlist_lock); - list_for_each_entry(ndev, &ndevlist, list) { - if (nitrox_ready(ndev)) + list_for_each_entry(iter, &ndevlist, list) { + if (nitrox_ready(iter)) { + ndev = iter; break; + } } mutex_unlock(&devlist_lock); - if (&ndev->list == &ndevlist) + if (!ndev) return NULL; refcount_inc(&ndev->refcnt); From c79c09ad31e2c0a5ba0166b284f433b95d9c4c18 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 5 Apr 2022 13:25:11 +0100 Subject: [PATCH 015/116] crypto: qat - stop using iommu_present() Even if an IOMMU might be present for some PCI segment in the system, that doesn't necessarily mean it provides translation for the device we care about. Replace iommu_present() with a more appropriate check. Signed-off-by: Robin Murphy Acked-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_sriov.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index b960bca1f9d2..7f9c18dc4540 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -3,7 +3,6 @@ #include #include #include -#include #include "adf_common_drv.h" #include "adf_cfg.h" #include "adf_pfvf_pf_msg.h" @@ -176,7 +175,7 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs) return -EFAULT; } - if (!iommu_present(&pci_bus_type)) + if (!device_iommu_mapped(&pdev->dev)) dev_warn(&pdev->dev, "IOMMU should be enabled for SR-IOV to work correctly\n"); if (accel_dev->pf.vf_info) { From 476c9ab75976c3aea1f7f05e1ebbd1219a0f11bb Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 6 Apr 2022 11:11:38 +0300 Subject: [PATCH 016/116] crypto: ccree - rearrange init calls to avoid race Rearrange init calls to avoid the rare race condition of the cipher algs being registered and used while we still init the hash code which uses the HW without proper lock. Signed-off-by: Gilad Ben-Yossef Reported-by: Dung Nguyen Tested-by: Jing Dan Tested-by: Dung Nguyen Fixes: 63893811b0fc("crypto: ccree - add ahash support") Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 790fa9058a36..7d1bee86d581 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -529,24 +529,26 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_req_mgr_err; } + /* hash must be allocated first due to use of send_request_init() + * and dependency of AEAD on it + */ + rc = cc_hash_alloc(new_drvdata); + if (rc) { + dev_err(dev, "cc_hash_alloc failed\n"); + goto post_buf_mgr_err; + } + /* Allocate crypto algs */ rc = cc_cipher_alloc(new_drvdata); if (rc) { dev_err(dev, "cc_cipher_alloc failed\n"); - goto post_buf_mgr_err; - } - - /* hash must be allocated before aead since hash exports APIs */ - rc = cc_hash_alloc(new_drvdata); - if (rc) { - dev_err(dev, "cc_hash_alloc failed\n"); - goto post_cipher_err; + goto post_hash_err; } rc = cc_aead_alloc(new_drvdata); if (rc) { dev_err(dev, "cc_aead_alloc failed\n"); - goto post_hash_err; + goto post_cipher_err; } /* If we got here and FIPS mode is enabled @@ -558,10 +560,10 @@ static int init_cc_resources(struct platform_device *plat_dev) pm_runtime_put(dev); return 0; -post_hash_err: - cc_hash_free(new_drvdata); post_cipher_err: cc_cipher_free(new_drvdata); +post_hash_err: + cc_hash_free(new_drvdata); post_buf_mgr_err: cc_buffer_mgr_fini(new_drvdata); post_req_mgr_err: @@ -593,8 +595,8 @@ static void cleanup_cc_resources(struct platform_device *plat_dev) (struct cc_drvdata *)platform_get_drvdata(plat_dev); cc_aead_free(drvdata); - cc_hash_free(drvdata); cc_cipher_free(drvdata); + cc_hash_free(drvdata); cc_buffer_mgr_fini(drvdata); cc_req_mgr_fini(drvdata); cc_fips_fini(drvdata); From a260436c98171cd825955a84a7f6e62bc8f4f00d Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 6 Apr 2022 11:11:39 +0300 Subject: [PATCH 017/116] crypto: ccree - use fine grained DMA mapping dir Use a fine grained specification of DMA mapping directions in certain cases, allowing both a more optimized operation as well as shushing out a harmless, though persky dma-debug warning. Signed-off-by: Gilad Ben-Yossef Reported-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_buffer_mgr.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 11e0278c8631..6140e4927322 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -356,12 +356,14 @@ void cc_unmap_cipher_request(struct device *dev, void *ctx, req_ctx->mlli_params.mlli_dma_addr); } - dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL); - dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); - if (src != dst) { - dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_TO_DEVICE); + dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_FROM_DEVICE); dev_dbg(dev, "Unmapped req->dst=%pK\n", sg_virt(dst)); + dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); + } else { + dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL); + dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); } } @@ -377,6 +379,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, u32 dummy = 0; int rc = 0; u32 mapped_nents = 0; + int src_direction = (src != dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); req_ctx->dma_buf_type = CC_DMA_BUF_DLLI; mlli_params->curr_pool = NULL; @@ -399,7 +402,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } /* Map the src SGL */ - rc = cc_map_sg(dev, src, nbytes, DMA_BIDIRECTIONAL, &req_ctx->in_nents, + rc = cc_map_sg(dev, src, nbytes, src_direction, &req_ctx->in_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents); if (rc) goto cipher_exit; @@ -416,7 +419,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } } else { /* Map the dst sg */ - rc = cc_map_sg(dev, dst, nbytes, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, dst, nbytes, DMA_FROM_DEVICE, &req_ctx->out_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents); if (rc) @@ -456,6 +459,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) struct aead_req_ctx *areq_ctx = aead_request_ctx(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct cc_drvdata *drvdata = dev_get_drvdata(dev); + int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); if (areq_ctx->mac_buf_dma_addr) { dma_unmap_single(dev, areq_ctx->mac_buf_dma_addr, @@ -514,13 +518,11 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents, areq_ctx->assoclen, req->cryptlen); - dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, - DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, src_direction); if (req->src != req->dst) { dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", sg_virt(req->dst)); - dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, - DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_FROM_DEVICE); } if (drvdata->coherent && areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT && @@ -843,7 +845,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, else size_for_map -= authsize; - rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, req->dst, size_for_map, DMA_FROM_DEVICE, &areq_ctx->dst.mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes, &dst_mapped_nents); @@ -1056,7 +1058,8 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) size_to_map += authsize; } - rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, req->src, size_to_map, + (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL), &areq_ctx->src.mapped_nents, (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES + LLI_MAX_NUM_OF_DATA_ENTRIES), From 6a23804cb8bcb85c6998bf193d94d4036db26f51 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 7 Apr 2022 17:54:40 +0100 Subject: [PATCH 018/116] crypto: qat - set CIPHER capability for DH895XCC Set the CIPHER capability for QAT DH895XCC devices if the hardware supports it. This is done if both the CIPHER and the AUTHENTICATION engines are available on the device. Fixes: ad1332aa67ec ("crypto: qat - add support for capability detection") Signed-off-by: Giovanni Cabiddu Signed-off-by: Marco Chiappero Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- .../crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 09599fe4d2f3..ff13047772e3 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -58,17 +58,23 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) capabilities = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC | ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC | - ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + ICP_ACCEL_CAPABILITIES_AUTHENTICATION | + ICP_ACCEL_CAPABILITIES_CIPHER; /* Read accelerator capabilities mask */ pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, &legfuses); - if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE) + /* A set bit in legfuses means the feature is OFF in this SKU */ + if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE) { capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC; + capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER; + } if (legfuses & ICP_ACCEL_MASK_PKE_SLICE) capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC; - if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE) + if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE) { capabilities &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER; + } if (legfuses & ICP_ACCEL_MASK_COMPRESS_SLICE) capabilities &= ~ICP_ACCEL_CAPABILITIES_COMPRESSION; From 0eaa51543273fd0f4ba9bea83638f7033436e5eb Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 7 Apr 2022 17:54:41 +0100 Subject: [PATCH 019/116] crypto: qat - set COMPRESSION capability for DH895XCC The capability detection logic clears bits for the features that are disabled in a certain SKU. For example, if the bit associate to compression is not present in the LEGFUSE register, the correspondent bit is cleared in the capability mask. This change adds the compression capability to the mask as this was missing in the commit that enhanced the capability detection logic. Fixes: cfe4894eccdc ("crypto: qat - set COMPRESSION capability for QAT GEN2") Signed-off-by: Giovanni Cabiddu Signed-off-by: Marco Chiappero Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index ff13047772e3..61d5467e0d92 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -59,7 +59,8 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) capabilities = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC | ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC | ICP_ACCEL_CAPABILITIES_AUTHENTICATION | - ICP_ACCEL_CAPABILITIES_CIPHER; + ICP_ACCEL_CAPABILITIES_CIPHER | + ICP_ACCEL_CAPABILITIES_COMPRESSION; /* Read accelerator capabilities mask */ pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, &legfuses); From 9ff9139b5ddbd4d3ea93558a2d477a6bab4eff94 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:42 +0100 Subject: [PATCH 020/116] crypto: qat - fix ETR sources enabled by default on GEN2 devices When the driver starts the device, it enables all the necessary interrupts. However interrupts associated to host rings are enabled by default on all GEN2 devices (except for dh895x) even when SR-IOV is active. Fix this behaviour by checking if data structures associated to VFs have been allocated to determine whether to enable such interrupts or not. Since the logic for the fix is the same across GEN2 devices, replace the function to be fixed (adf_enable_ints()) with a single one (adf_gen2_enable_ints()) in the common GEN2 code in adf_gen2_hw_data.c. Likewise, remove the unnecessary duplication of defines too. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 15 +-------------- drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h | 4 ---- drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c | 15 +-------------- drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h | 4 ---- drivers/crypto/qat/qat_common/adf_gen2_hw_data.c | 13 +++++++++++++ drivers/crypto/qat/qat_common/adf_gen2_hw_data.h | 6 ++++++ .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 16 +--------------- .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.h | 4 ---- 8 files changed, 22 insertions(+), 55 deletions(-) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index b941fe3713ff..50d5afa26a9b 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -78,19 +78,6 @@ static const u32 *adf_get_arbiter_mapping(void) return thrd_to_arb_map; } -static void adf_enable_ints(struct adf_accel_dev *accel_dev) -{ - void __iomem *addr; - - addr = (&GET_BARS(accel_dev)[ADF_C3XXX_PMISC_BAR])->virt_addr; - - /* Enable bundle and misc interrupts */ - ADF_CSR_WR(addr, ADF_C3XXX_SMIAPF0_MASK_OFFSET, - ADF_C3XXX_SMIA0_MASK); - ADF_CSR_WR(addr, ADF_C3XXX_SMIAPF1_MASK_OFFSET, - ADF_C3XXX_SMIA1_MASK); -} - static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable) { adf_gen2_cfg_iov_thds(accel_dev, enable, @@ -133,7 +120,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; - hw_data->enable_ints = adf_enable_ints; + hw_data->enable_ints = adf_gen2_enable_ints; hw_data->reset_device = adf_reset_flr; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; hw_data->disable_iov = adf_disable_sriov; diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h index 1b86f828725c..336a06f11dbd 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h @@ -13,10 +13,6 @@ #define ADF_C3XXX_ACCELERATORS_MASK 0x7 #define ADF_C3XXX_ACCELENGINES_MASK 0x3F #define ADF_C3XXX_ETR_MAX_BANKS 16 -#define ADF_C3XXX_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28) -#define ADF_C3XXX_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30) -#define ADF_C3XXX_SMIA0_MASK 0xFFFF -#define ADF_C3XXX_SMIA1_MASK 0x1 #define ADF_C3XXX_SOFTSTRAP_CSR_OFFSET 0x2EC /* AE to function mapping */ diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index b1eac2f81faa..c00386fe6587 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -80,19 +80,6 @@ static const u32 *adf_get_arbiter_mapping(void) return thrd_to_arb_map; } -static void adf_enable_ints(struct adf_accel_dev *accel_dev) -{ - void __iomem *addr; - - addr = (&GET_BARS(accel_dev)[ADF_C62X_PMISC_BAR])->virt_addr; - - /* Enable bundle and misc interrupts */ - ADF_CSR_WR(addr, ADF_C62X_SMIAPF0_MASK_OFFSET, - ADF_C62X_SMIA0_MASK); - ADF_CSR_WR(addr, ADF_C62X_SMIAPF1_MASK_OFFSET, - ADF_C62X_SMIA1_MASK); -} - static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable) { adf_gen2_cfg_iov_thds(accel_dev, enable, @@ -135,7 +122,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; - hw_data->enable_ints = adf_enable_ints; + hw_data->enable_ints = adf_gen2_enable_ints; hw_data->reset_device = adf_reset_flr; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; hw_data->disable_iov = adf_disable_sriov; diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h index 68c3436bd3aa..008c0a3a9769 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h @@ -13,10 +13,6 @@ #define ADF_C62X_ACCELERATORS_MASK 0x1F #define ADF_C62X_ACCELENGINES_MASK 0x3FF #define ADF_C62X_ETR_MAX_BANKS 16 -#define ADF_C62X_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28) -#define ADF_C62X_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30) -#define ADF_C62X_SMIA0_MASK 0xFFFF -#define ADF_C62X_SMIA1_MASK 0x1 #define ADF_C62X_SOFTSTRAP_CSR_OFFSET 0x2EC /* AE to function mapping */ diff --git a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c index 57035b7dd4b2..d1884547b5a1 100644 --- a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c +++ b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c @@ -98,6 +98,19 @@ void adf_gen2_get_arb_info(struct arb_info *arb_info) } EXPORT_SYMBOL_GPL(adf_gen2_get_arb_info); +void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev) +{ + void __iomem *addr = adf_get_pmisc_base(accel_dev); + u32 val; + + val = accel_dev->pf.vf_info ? 0 : BIT_ULL(GET_MAX_BANKS(accel_dev)) - 1; + + /* Enable bundle and misc interrupts */ + ADF_CSR_WR(addr, ADF_GEN2_SMIAPF0_MASK_OFFSET, val); + ADF_CSR_WR(addr, ADF_GEN2_SMIAPF1_MASK_OFFSET, ADF_GEN2_SMIA1_MASK); +} +EXPORT_SYMBOL_GPL(adf_gen2_enable_ints); + static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) { return BUILD_RING_BASE_ADDR(addr, size); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h index f2e0451b11c0..e4bc07529be4 100644 --- a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h +++ b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h @@ -145,6 +145,11 @@ do { \ #define ADF_GEN2_CERRSSMSH(i) ((i) * 0x4000 + 0x10) #define ADF_GEN2_ERRSSMSH_EN BIT(3) +/* Interrupts */ +#define ADF_GEN2_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28) +#define ADF_GEN2_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30) +#define ADF_GEN2_SMIA1_MASK 0x1 + u32 adf_gen2_get_num_accels(struct adf_hw_device_data *self); u32 adf_gen2_get_num_aes(struct adf_hw_device_data *self); void adf_gen2_enable_error_correction(struct adf_accel_dev *accel_dev); @@ -153,6 +158,7 @@ void adf_gen2_cfg_iov_thds(struct adf_accel_dev *accel_dev, bool enable, void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); void adf_gen2_get_admin_info(struct admin_info *admin_csrs_info); void adf_gen2_get_arb_info(struct arb_info *arb_info); +void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev); u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev); void adf_gen2_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 61d5467e0d92..7375436ac1b8 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -107,20 +107,6 @@ static const u32 *adf_get_arbiter_mapping(void) return thrd_to_arb_map; } -static void adf_enable_ints(struct adf_accel_dev *accel_dev) -{ - void __iomem *addr; - - addr = (&GET_BARS(accel_dev)[ADF_DH895XCC_PMISC_BAR])->virt_addr; - - /* Enable bundle and misc interrupts */ - ADF_CSR_WR(addr, ADF_DH895XCC_SMIAPF0_MASK_OFFSET, - accel_dev->pf.vf_info ? 0 : - BIT_ULL(GET_MAX_BANKS(accel_dev)) - 1); - ADF_CSR_WR(addr, ADF_DH895XCC_SMIAPF1_MASK_OFFSET, - ADF_DH895XCC_SMIA1_MASK); -} - static u32 get_vf2pf_sources(void __iomem *pmisc_bar) { u32 errsou3, errmsk3, errsou5, errmsk5, vf_int_mask; @@ -222,7 +208,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; - hw_data->enable_ints = adf_enable_ints; + hw_data->enable_ints = adf_gen2_enable_ints; hw_data->reset_device = adf_reset_sbr; hw_data->disable_iov = adf_disable_sriov; diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h index aa17272a1507..7b674bbe4192 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h @@ -19,10 +19,6 @@ #define ADF_DH895XCC_ACCELERATORS_MASK 0x3F #define ADF_DH895XCC_ACCELENGINES_MASK 0xFFF #define ADF_DH895XCC_ETR_MAX_BANKS 32 -#define ADF_DH895XCC_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28) -#define ADF_DH895XCC_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30) -#define ADF_DH895XCC_SMIA0_MASK 0xFFFFFFFF -#define ADF_DH895XCC_SMIA1_MASK 0x1 /* Masks for VF2PF interrupts */ #define ADF_DH895XCC_ERR_REG_VF2PF_L(vf_src) (((vf_src) & 0x01FFFE00) >> 9) From 992ec1fa86919933bfe2963473ef291b2031556d Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:43 +0100 Subject: [PATCH 021/116] crypto: qat - remove unneeded braces Remove unnecessary braces around a single statement in a for loop. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_sriov.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 7f9c18dc4540..887d95ec1f48 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -116,9 +116,8 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) if (hw_data->configure_iov_threads) hw_data->configure_iov_threads(accel_dev, false); - for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) { + for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) mutex_destroy(&vf->pf2vf_lock); - } kfree(accel_dev->pf.vf_info); accel_dev->pf.vf_info = NULL; From 80280aeb2d51140f61ebd88343cd30daf22b4dc2 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 7 Apr 2022 17:54:44 +0100 Subject: [PATCH 022/116] crypto: qat - remove unused PFVF stubs The functions adf_enable_pf2vf_interrupts(), adf_flush_vf_wq() and adf_disable_pf2vf_interrupts() are not referenced when the driver is compiled with CONFIG_PCI_IOV=n. This patch removes these unused stubs. Signed-off-by: Giovanni Cabiddu Signed-off-by: Marco Chiappero Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_common_drv.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index e8c9b77c0d66..feecf1035a90 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -217,14 +217,6 @@ static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev) { } -static inline void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) -{ -} - -static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) -{ -} - static inline int adf_init_pf_wq(void) { return 0; @@ -243,10 +235,6 @@ static inline void adf_exit_vf_wq(void) { } -static inline void adf_flush_vf_wq(struct adf_accel_dev *accel_dev) -{ -} - #endif static inline void __iomem *adf_get_pmisc_base(struct adf_accel_dev *accel_dev) From f9f8f2b7415705448118102764076504c0850082 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:45 +0100 Subject: [PATCH 023/116] crypto: qat - remove unnecessary tests to detect PFVF support Previously, the GEN4 host driver supported SR-IOV but had no working implementation of the PFVF protocol to communicate with VF drivers. Since all the host drivers for QAT devices now support both SR-IOV and PFVF, remove the old and unnecessary checks to test PFVF support. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_sriov.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 887d95ec1f48..8e8421a46b54 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -73,8 +73,7 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) hw_data->configure_iov_threads(accel_dev, true); /* Enable VF to PF interrupts for all VFs */ - if (hw_data->pfvf_ops.get_pf2vf_offset) - adf_enable_vf2pf_interrupts(accel_dev, BIT_ULL(totalvfs) - 1); + adf_enable_vf2pf_interrupts(accel_dev, BIT_ULL(totalvfs) - 1); /* * Due to the hardware design, when SR-IOV and the ring arbiter @@ -103,14 +102,11 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) if (!accel_dev->pf.vf_info) return; - if (hw_data->pfvf_ops.get_pf2vf_offset) - adf_pf2vf_notify_restarting(accel_dev); - + adf_pf2vf_notify_restarting(accel_dev); pci_disable_sriov(accel_to_pci_dev(accel_dev)); /* Disable VF to PF interrupts */ - if (hw_data->pfvf_ops.get_pf2vf_offset) - adf_disable_vf2pf_interrupts(accel_dev, GENMASK(31, 0)); + adf_disable_vf2pf_interrupts(accel_dev, GENMASK(31, 0)); /* Clear Valid bits in AE Thread to PCIe Function Mapping */ if (hw_data->configure_iov_threads) From 569b462e6604fab6ec4dc6649d06ac62564567ce Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:46 +0100 Subject: [PATCH 024/116] crypto: qat - add missing restarting event notification in VFs VF drivers are notified via PFVF of the VFs being disabled, but such notification was not propagated within the VF driver. Dispatch the ADF_EVENT_RESTARTING event by adding a missing call to adf_dev_restarting_notify(). Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_vf_isr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index 86c3bd0c9c2b..8c95fcd8e64b 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -70,6 +70,7 @@ static void adf_dev_stop_async(struct work_struct *work) container_of(work, struct adf_vf_stop_data, work); struct adf_accel_dev *accel_dev = stop_data->accel_dev; + adf_dev_restarting_notify(accel_dev); adf_dev_stop(accel_dev); adf_dev_shutdown(accel_dev); From 4b61d2bd346de12bb62668ae0dc2f332643067a3 Mon Sep 17 00:00:00 2001 From: Wojciech Ziemba Date: Thu, 7 Apr 2022 17:54:47 +0100 Subject: [PATCH 025/116] crypto: qat - add check for invalid PFVF protocol version 0 PFVF protocol version 0 is not a valid version, but PF drivers currently would report any such version from VFs as compatible. This patch adds an extra check for the invalid PFVF protocol version 0. Signed-off-by: Wojciech Ziemba Signed-off-by: Marco Chiappero Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c index 588352de1ef0..c059b98836aa 100644 --- a/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c +++ b/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c @@ -242,7 +242,9 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr, "VersionRequest received from VF%d (vers %d) to PF (vers %d)\n", vf_nr, vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); - if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION) + if (vf_compat_ver == 0) + compat = ADF_PF2VF_VF_INCOMPATIBLE; + else if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION) compat = ADF_PF2VF_VF_COMPATIBLE; else compat = ADF_PF2VF_VF_COMPAT_UNKNOWN; From 27c0f3a14f9fd16eed4e0167cf58225ca28ab4f8 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:48 +0100 Subject: [PATCH 026/116] crypto: qat - test PFVF registers for spurious interrupts on GEN4 Spurious PFVF interrupts can happen when either the ISR is invoked without a valid source being set or, otherwise, when no interrupt bit is set in the PFVF register containing the message. The latter test was present for GEN2 devices but missing for GEN4, this patch fills the gap. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_gen4_pfvf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c index d80d493a7756..f7860bf612da 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c @@ -96,10 +96,16 @@ static struct pfvf_message adf_gen4_pfvf_recv(struct adf_accel_dev *accel_dev, u32 pfvf_offset, u8 compat_ver) { void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev); + struct pfvf_message msg = { 0 }; u32 csr_val; /* Read message from the CSR */ csr_val = ADF_CSR_RD(pmisc_addr, pfvf_offset); + if (!(csr_val & ADF_PFVF_INT)) { + dev_info(&GET_DEV(accel_dev), + "Spurious PFVF interrupt, msg 0x%.8x. Ignored\n", csr_val); + return msg; + } /* We can now acknowledge the message reception by clearing the * interrupt bit From 2ca1e0a7fafa65fc6bd8d0236146f8fb8e8a3f81 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:49 +0100 Subject: [PATCH 027/116] crypto: qat - fix wording and formatting in code comment Remove an unintentional extra space and improve the readability of a PFVF related code comment. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_pfvf_msg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_msg.h b/drivers/crypto/qat/qat_common/adf_pfvf_msg.h index 9c37a2661392..204a42438992 100644 --- a/drivers/crypto/qat/qat_common/adf_pfvf_msg.h +++ b/drivers/crypto/qat/qat_common/adf_pfvf_msg.h @@ -8,8 +8,8 @@ /* * PF<->VF Gen2 Messaging format * - * The PF has an array of 32-bit PF2VF registers, one for each VF. The - * PF can access all these registers; each VF can access only the one + * The PF has an array of 32-bit PF2VF registers, one for each VF. The + * PF can access all these registers while each VF can access only the one * register associated with that particular VF. * * The register functionally is split into two parts: From dd3d081b7ea6754913222ed0313fcf644edcc7e6 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:50 +0100 Subject: [PATCH 028/116] crypto: qat - fix off-by-one error in PFVF debug print PFVF Block Message requests for CRC use 0-based values to indicate amounts, which have to be remapped to 1-based values on the receiving side. This patch fixes one debug print which was however using the wire value. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c index c059b98836aa..388e58bcbcaf 100644 --- a/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c +++ b/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c @@ -154,7 +154,7 @@ static struct pfvf_message handle_blkmsg_req(struct adf_accel_vf_info *vf_info, if (FIELD_GET(ADF_VF2PF_BLOCK_CRC_REQ_MASK, req.data)) { dev_dbg(&GET_DEV(vf_info->accel_dev), "BlockMsg of type %d for CRC over %d bytes received from VF%d\n", - blk_type, blk_byte, vf_info->vf_nr); + blk_type, blk_byte + 1, vf_info->vf_nr); if (!adf_pf2vf_blkmsg_get_data(vf_info, blk_type, blk_byte, byte_max, &resp_data, From c690c7f6312ce69b426af08ae1da2b9e48a0246f Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:51 +0100 Subject: [PATCH 029/116] crypto: qat - rework the VF2PF interrupt handling logic Change the VF2PF interrupt handler in the PF ISR and the definition of the internal PFVF API to correct the current implementation, which can result in missed interrupts. More specifically, current HW generations consider a write to the mask register, regardless of the value, as an acknowledge of any pending VF2PF interrupt. Therefore, if there is an interrupt between the source register read and the mask register write, such interrupt will not be delivered and silently acknowledged, resulting in a lost VF2PF message. To work around the problem, rather than disabling specific interrupts, disable all the interrupts and re-enable only the ones that we are not serving (excluding the already disabled ones too). This will force any other pending interrupt to be triggered and be serviced by a subsequent ISR. This new approach requires, however, changes to the interrupt related pfvf_ops functions. In particular, get_vf2pf_sources() has now been removed in favor of disable_pending_vf2pf_interrupts(), which not only retrieves and returns the pending (and enabled) sources, but also disables them. As a consequence, introduce the adf_disable_pending_vf2pf_interrupts() utility in place of adf_disable_vf2pf_interrupts_irq(), which is no longer needed. Cc: stable@vger.kernel.org Fixes: 993161d ("crypto: qat - fix handling of VF to PF interrupts") Signed-off-by: Marco Chiappero Co-developed-by: Giovanni Cabiddu Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../crypto/qat/qat_common/adf_accel_devices.h | 2 +- drivers/crypto/qat/qat_common/adf_gen2_pfvf.c | 58 +++++++++----- drivers/crypto/qat/qat_common/adf_gen4_pfvf.c | 44 ++++++++--- drivers/crypto/qat/qat_common/adf_isr.c | 17 ++--- .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 76 +++++++++++++------ 5 files changed, 132 insertions(+), 65 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index a03c6cf72331..dfa7ee41c5e9 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -152,9 +152,9 @@ struct adf_pfvf_ops { int (*enable_comms)(struct adf_accel_dev *accel_dev); u32 (*get_pf2vf_offset)(u32 i); u32 (*get_vf2pf_offset)(u32 i); - u32 (*get_vf2pf_sources)(void __iomem *pmisc_addr); void (*enable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask); void (*disable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask); + u32 (*disable_pending_vf2pf_interrupts)(void __iomem *pmisc_addr); int (*send_msg)(struct adf_accel_dev *accel_dev, struct pfvf_message msg, u32 pfvf_offset, struct mutex *csr_lock); struct pfvf_message (*recv_msg)(struct adf_accel_dev *accel_dev, diff --git a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c index 1a9072aac2ca..def4cc8e1039 100644 --- a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c @@ -13,6 +13,7 @@ #include "adf_pfvf_utils.h" /* VF2PF interrupts */ +#define ADF_GEN2_VF_MSK 0xFFFF #define ADF_GEN2_ERR_REG_VF2PF(vf_src) (((vf_src) & 0x01FFFE00) >> 9) #define ADF_GEN2_ERR_MSK_VF2PF(vf_mask) (((vf_mask) & 0xFFFF) << 9) @@ -50,23 +51,6 @@ static u32 adf_gen2_vf_get_pfvf_offset(u32 i) return ADF_GEN2_VF_PF2VF_OFFSET; } -static u32 adf_gen2_get_vf2pf_sources(void __iomem *pmisc_addr) -{ - u32 errsou3, errmsk3, vf_int_mask; - - /* Get the interrupt sources triggered by VFs */ - errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3); - vf_int_mask = ADF_GEN2_ERR_REG_VF2PF(errsou3); - - /* To avoid adding duplicate entries to work queue, clear - * vf_int_mask_sets bits that are already masked in ERRMSK register. - */ - errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3); - vf_int_mask &= ~ADF_GEN2_ERR_REG_VF2PF(errmsk3); - - return vf_int_mask; -} - static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { @@ -89,6 +73,44 @@ static void adf_gen2_disable_vf2pf_interrupts(void __iomem *pmisc_addr, } } +static u32 adf_gen2_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) +{ + u32 sources, disabled, pending; + u32 errsou3, errmsk3; + + /* Get the interrupt sources triggered by VFs */ + errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3); + sources = ADF_GEN2_ERR_REG_VF2PF(errsou3); + + if (!sources) + return 0; + + /* Get the already disabled interrupts */ + errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3); + disabled = ADF_GEN2_ERR_REG_VF2PF(errmsk3); + + pending = sources & ~disabled; + if (!pending) + return 0; + + /* Due to HW limitations, when disabling the interrupts, we can't + * just disable the requested sources, as this would lead to missed + * interrupts if ERRSOU3 changes just before writing to ERRMSK3. + * To work around it, disable all and re-enable only the sources that + * are not in vf_mask and were not already disabled. Re-enabling will + * trigger a new interrupt for the sources that have changed in the + * meantime, if any. + */ + errmsk3 |= ADF_GEN2_ERR_MSK_VF2PF(ADF_GEN2_VF_MSK); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); + + errmsk3 &= ADF_GEN2_ERR_MSK_VF2PF(sources | disabled); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); + + /* Return the sources of the (new) interrupt(s) */ + return pending; +} + static u32 gen2_csr_get_int_bit(enum gen2_csr_pos offset) { return ADF_PFVF_INT << offset; @@ -362,9 +384,9 @@ void adf_gen2_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops) pfvf_ops->enable_comms = adf_enable_pf2vf_comms; pfvf_ops->get_pf2vf_offset = adf_gen2_pf_get_pfvf_offset; pfvf_ops->get_vf2pf_offset = adf_gen2_pf_get_pfvf_offset; - pfvf_ops->get_vf2pf_sources = adf_gen2_get_vf2pf_sources; pfvf_ops->enable_vf2pf_interrupts = adf_gen2_enable_vf2pf_interrupts; pfvf_ops->disable_vf2pf_interrupts = adf_gen2_disable_vf2pf_interrupts; + pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen2_disable_pending_vf2pf_interrupts; pfvf_ops->send_msg = adf_gen2_pf2vf_send; pfvf_ops->recv_msg = adf_gen2_vf2pf_recv; } diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c index f7860bf612da..4061725b926d 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c @@ -15,6 +15,7 @@ /* VF2PF interrupt source registers */ #define ADF_4XXX_VM2PF_SOU 0x41A180 #define ADF_4XXX_VM2PF_MSK 0x41A1C0 +#define ADF_GEN4_VF_MSK 0xFFFF #define ADF_PFVF_GEN4_MSGTYPE_SHIFT 2 #define ADF_PFVF_GEN4_MSGTYPE_MASK 0x3F @@ -36,16 +37,6 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) return ADF_4XXX_VM2PF_OFFSET(i); } -static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr) -{ - u32 sou, mask; - - sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU); - mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK); - - return sou & ~mask; -} - static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { @@ -64,6 +55,37 @@ static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr, ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); } +static u32 adf_gen4_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) +{ + u32 sources, disabled, pending; + + /* Get the interrupt sources triggered by VFs */ + sources = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU); + if (!sources) + return 0; + + /* Get the already disabled interrupts */ + disabled = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK); + + pending = sources & ~disabled; + if (!pending) + return 0; + + /* Due to HW limitations, when disabling the interrupts, we can't + * just disable the requested sources, as this would lead to missed + * interrupts if VM2PF_SOU changes just before writing to VM2PF_MSK. + * To work around it, disable all and re-enable only the sources that + * are not in vf_mask and were not already disabled. Re-enabling will + * trigger a new interrupt for the sources that have changed in the + * meantime, if any. + */ + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, ADF_GEN4_VF_MSK); + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, disabled | sources); + + /* Return the sources of the (new) interrupt(s) */ + return pending; +} + static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev, struct pfvf_message msg, u32 pfvf_offset, struct mutex *csr_lock) @@ -121,9 +143,9 @@ void adf_gen4_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops) pfvf_ops->enable_comms = adf_enable_pf2vf_comms; pfvf_ops->get_pf2vf_offset = adf_gen4_pf_get_pf2vf_offset; pfvf_ops->get_vf2pf_offset = adf_gen4_pf_get_vf2pf_offset; - pfvf_ops->get_vf2pf_sources = adf_gen4_get_vf2pf_sources; pfvf_ops->enable_vf2pf_interrupts = adf_gen4_enable_vf2pf_interrupts; pfvf_ops->disable_vf2pf_interrupts = adf_gen4_disable_vf2pf_interrupts; + pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen4_disable_pending_vf2pf_interrupts; pfvf_ops->send_msg = adf_gen4_pfvf_send; pfvf_ops->recv_msg = adf_gen4_pfvf_recv; } diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index a35149f8bf1e..23f7fff32c64 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -76,32 +76,29 @@ void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags); } -static void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, - u32 vf_mask) +static u32 adf_disable_pending_vf2pf_interrupts(struct adf_accel_dev *accel_dev) { void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev); + u32 pending; spin_lock(&accel_dev->pf.vf2pf_ints_lock); - GET_PFVF_OPS(accel_dev)->disable_vf2pf_interrupts(pmisc_addr, vf_mask); + pending = GET_PFVF_OPS(accel_dev)->disable_pending_vf2pf_interrupts(pmisc_addr); spin_unlock(&accel_dev->pf.vf2pf_ints_lock); + + return pending; } static bool adf_handle_vf2pf_int(struct adf_accel_dev *accel_dev) { - void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev); bool irq_handled = false; unsigned long vf_mask; - /* Get the interrupt sources triggered by VFs */ - vf_mask = GET_PFVF_OPS(accel_dev)->get_vf2pf_sources(pmisc_addr); - + /* Get the interrupt sources triggered by VFs, except for those already disabled */ + vf_mask = adf_disable_pending_vf2pf_interrupts(accel_dev); if (vf_mask) { struct adf_accel_vf_info *vf_info; int i; - /* Disable VF2PF interrupts for VFs with pending ints */ - adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask); - /* * Handle VF2PF interrupt unless the VF is malicious and * is attempting to flood the host OS with VF2PF interrupts. diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 7375436ac1b8..86187671893c 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -7,6 +7,8 @@ #include "adf_dh895xcc_hw_data.h" #include "icp_qat_hw.h" +#define ADF_DH895XCC_VF_MSK 0xFFFFFFFF + /* Worker thread to service arbiter mappings */ static const u32 thrd_to_arb_map[ADF_DH895XCC_MAX_ACCELENGINES] = { 0x12222AAA, 0x11666666, 0x12222AAA, 0x11666666, @@ -107,29 +109,6 @@ static const u32 *adf_get_arbiter_mapping(void) return thrd_to_arb_map; } -static u32 get_vf2pf_sources(void __iomem *pmisc_bar) -{ - u32 errsou3, errmsk3, errsou5, errmsk5, vf_int_mask; - - /* Get the interrupt sources triggered by VFs */ - errsou3 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRSOU3); - vf_int_mask = ADF_DH895XCC_ERR_REG_VF2PF_L(errsou3); - - /* To avoid adding duplicate entries to work queue, clear - * vf_int_mask_sets bits that are already masked in ERRMSK register. - */ - errmsk3 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRMSK3); - vf_int_mask &= ~ADF_DH895XCC_ERR_REG_VF2PF_L(errmsk3); - - /* Do the same for ERRSOU5 */ - errsou5 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRSOU5); - errmsk5 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRMSK5); - vf_int_mask |= ADF_DH895XCC_ERR_REG_VF2PF_U(errsou5); - vf_int_mask &= ~ADF_DH895XCC_ERR_REG_VF2PF_U(errmsk5); - - return vf_int_mask; -} - static void enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { /* Enable VF2PF Messaging Ints - VFs 0 through 15 per vf_mask[15:0] */ @@ -143,7 +122,6 @@ static void enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) if (vf_mask >> 16) { u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5) & ~ADF_DH895XCC_ERR_MSK_VF2PF_U(vf_mask); - ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, val); } } @@ -166,6 +144,54 @@ static void disable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) } } +static u32 disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) +{ + u32 sources, pending, disabled; + u32 errsou3, errmsk3; + u32 errsou5, errmsk5; + + /* Get the interrupt sources triggered by VFs */ + errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3); + errsou5 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU5); + sources = ADF_DH895XCC_ERR_REG_VF2PF_L(errsou3) + | ADF_DH895XCC_ERR_REG_VF2PF_U(errsou5); + + if (!sources) + return 0; + + /* Get the already disabled interrupts */ + errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3); + errmsk5 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5); + disabled = ADF_DH895XCC_ERR_REG_VF2PF_L(errmsk3) + | ADF_DH895XCC_ERR_REG_VF2PF_U(errmsk5); + + pending = sources & ~disabled; + if (!pending) + return 0; + + /* Due to HW limitations, when disabling the interrupts, we can't + * just disable the requested sources, as this would lead to missed + * interrupts if sources changes just before writing to ERRMSK3 and + * ERRMSK5. + * To work around it, disable all and re-enable only the sources that + * are not in vf_mask and were not already disabled. Re-enabling will + * trigger a new interrupt for the sources that have changed in the + * meantime, if any. + */ + errmsk3 |= ADF_DH895XCC_ERR_MSK_VF2PF_L(ADF_DH895XCC_VF_MSK); + errmsk5 |= ADF_DH895XCC_ERR_MSK_VF2PF_U(ADF_DH895XCC_VF_MSK); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, errmsk5); + + errmsk3 &= ADF_DH895XCC_ERR_MSK_VF2PF_L(sources | disabled); + errmsk5 &= ADF_DH895XCC_ERR_MSK_VF2PF_U(sources | disabled); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, errmsk5); + + /* Return the sources of the (new) interrupt(s) */ + return pending; +} + static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable) { adf_gen2_cfg_iov_thds(accel_dev, enable, @@ -213,9 +239,9 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->disable_iov = adf_disable_sriov; adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); - hw_data->pfvf_ops.get_vf2pf_sources = get_vf2pf_sources; hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts; hw_data->pfvf_ops.disable_vf2pf_interrupts = disable_vf2pf_interrupts; + hw_data->pfvf_ops.disable_pending_vf2pf_interrupts = disable_pending_vf2pf_interrupts; adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } From 8314ae8f5363ff8e7dece6d0eb884970a5530969 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:52 +0100 Subject: [PATCH 030/116] crypto: qat - leverage the GEN2 VF mask definiton Replace hard coded VF masks in adf_gen2_pfvf.c with the recently introduced ADF_GEN2_VF_MSK. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_gen2_pfvf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c index def4cc8e1039..8df952df18ef 100644 --- a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c @@ -15,7 +15,7 @@ /* VF2PF interrupts */ #define ADF_GEN2_VF_MSK 0xFFFF #define ADF_GEN2_ERR_REG_VF2PF(vf_src) (((vf_src) & 0x01FFFE00) >> 9) -#define ADF_GEN2_ERR_MSK_VF2PF(vf_mask) (((vf_mask) & 0xFFFF) << 9) +#define ADF_GEN2_ERR_MSK_VF2PF(vf_mask) (((vf_mask) & ADF_GEN2_VF_MSK) << 9) #define ADF_GEN2_PF_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04)) #define ADF_GEN2_VF_PF2VF_OFFSET 0x200 @@ -55,7 +55,7 @@ static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { /* Enable VF2PF Messaging Ints - VFs 0 through 15 per vf_mask[15:0] */ - if (vf_mask & 0xFFFF) { + if (vf_mask & ADF_GEN2_VF_MSK) { u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3) & ~ADF_GEN2_ERR_MSK_VF2PF(vf_mask); ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val); @@ -66,7 +66,7 @@ static void adf_gen2_disable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { /* Disable VF2PF interrupts for VFs 0 through 15 per vf_mask[15:0] */ - if (vf_mask & 0xFFFF) { + if (vf_mask & ADF_GEN2_VF_MSK) { u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3) | ADF_GEN2_ERR_MSK_VF2PF(vf_mask); ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val); From e3e668fc77153591553a14c4077c619b2ab55974 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:53 +0100 Subject: [PATCH 031/116] crypto: qat - replace disable_vf2pf_interrupts() As a consequence of the refactored VF2PF interrupt handling logic, a function that disables specific VF2PF interrupts is no longer needed. Instead, a simpler function that disables all the interrupts, also hiding the device specific amount of VFs to be disabled from the pfvf_ops users, would be sufficient. This patch replaces disable_vf2pf_interrupts() with the new disable_all_vf2pf_interrupts(), which doesn't need any argument and disables all the VF2PF interrupts. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../crypto/qat/qat_common/adf_accel_devices.h | 2 +- .../crypto/qat/qat_common/adf_common_drv.h | 3 +-- drivers/crypto/qat/qat_common/adf_gen2_pfvf.c | 13 ++++------- drivers/crypto/qat/qat_common/adf_gen4_pfvf.c | 10 +++----- drivers/crypto/qat/qat_common/adf_isr.c | 4 ++-- drivers/crypto/qat/qat_common/adf_sriov.c | 2 +- .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 23 ++++++++----------- 7 files changed, 23 insertions(+), 34 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index dfa7ee41c5e9..e927799a8e6c 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -153,7 +153,7 @@ struct adf_pfvf_ops { u32 (*get_pf2vf_offset)(u32 i); u32 (*get_vf2pf_offset)(u32 i); void (*enable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask); - void (*disable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask); + void (*disable_all_vf2pf_interrupts)(void __iomem *pmisc_addr); u32 (*disable_pending_vf2pf_interrupts)(void __iomem *pmisc_addr); int (*send_msg)(struct adf_accel_dev *accel_dev, struct pfvf_message msg, u32 pfvf_offset, struct mutex *csr_lock); diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index feecf1035a90..da9d765834f0 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -195,10 +195,9 @@ bool adf_misc_wq_queue_work(struct work_struct *work); #if defined(CONFIG_PCI_IOV) int adf_sriov_configure(struct pci_dev *pdev, int numvfs); void adf_disable_sriov(struct adf_accel_dev *accel_dev); -void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, - u32 vf_mask); void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask); +void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev); bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev); bool adf_recv_and_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u32 vf_nr); int adf_pf2vf_handle_pf_restarting(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c index 8df952df18ef..606409533409 100644 --- a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c @@ -62,15 +62,12 @@ static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr, } } -static void adf_gen2_disable_vf2pf_interrupts(void __iomem *pmisc_addr, - u32 vf_mask) +static void adf_gen2_disable_all_vf2pf_interrupts(void __iomem *pmisc_addr) { /* Disable VF2PF interrupts for VFs 0 through 15 per vf_mask[15:0] */ - if (vf_mask & ADF_GEN2_VF_MSK) { - u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3) - | ADF_GEN2_ERR_MSK_VF2PF(vf_mask); - ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val); - } + u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3) + | ADF_GEN2_ERR_MSK_VF2PF(ADF_GEN2_VF_MSK); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val); } static u32 adf_gen2_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) @@ -385,7 +382,7 @@ void adf_gen2_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops) pfvf_ops->get_pf2vf_offset = adf_gen2_pf_get_pfvf_offset; pfvf_ops->get_vf2pf_offset = adf_gen2_pf_get_pfvf_offset; pfvf_ops->enable_vf2pf_interrupts = adf_gen2_enable_vf2pf_interrupts; - pfvf_ops->disable_vf2pf_interrupts = adf_gen2_disable_vf2pf_interrupts; + pfvf_ops->disable_all_vf2pf_interrupts = adf_gen2_disable_all_vf2pf_interrupts; pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen2_disable_pending_vf2pf_interrupts; pfvf_ops->send_msg = adf_gen2_pf2vf_send; pfvf_ops->recv_msg = adf_gen2_vf2pf_recv; diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c index 4061725b926d..8091fc52e13a 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c @@ -46,13 +46,9 @@ static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); } -static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr, - u32 vf_mask) +static void adf_gen4_disable_all_vf2pf_interrupts(void __iomem *pmisc_addr) { - unsigned int val; - - val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) | vf_mask; - ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, ADF_GEN4_VF_MSK); } static u32 adf_gen4_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) @@ -144,7 +140,7 @@ void adf_gen4_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops) pfvf_ops->get_pf2vf_offset = adf_gen4_pf_get_pf2vf_offset; pfvf_ops->get_vf2pf_offset = adf_gen4_pf_get_vf2pf_offset; pfvf_ops->enable_vf2pf_interrupts = adf_gen4_enable_vf2pf_interrupts; - pfvf_ops->disable_vf2pf_interrupts = adf_gen4_disable_vf2pf_interrupts; + pfvf_ops->disable_all_vf2pf_interrupts = adf_gen4_disable_all_vf2pf_interrupts; pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen4_disable_pending_vf2pf_interrupts; pfvf_ops->send_msg = adf_gen4_pfvf_send; pfvf_ops->recv_msg = adf_gen4_pfvf_recv; diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index 23f7fff32c64..ad9e135b8560 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -66,13 +66,13 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags); } -void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) +void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev) { void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev); unsigned long flags; spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags); - GET_PFVF_OPS(accel_dev)->disable_vf2pf_interrupts(pmisc_addr, vf_mask); + GET_PFVF_OPS(accel_dev)->disable_all_vf2pf_interrupts(pmisc_addr); spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags); } diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 8e8421a46b54..f38b2ffde146 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -106,7 +106,7 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) pci_disable_sriov(accel_to_pci_dev(accel_dev)); /* Disable VF to PF interrupts */ - adf_disable_vf2pf_interrupts(accel_dev, GENMASK(31, 0)); + adf_disable_all_vf2pf_interrupts(accel_dev); /* Clear Valid bits in AE Thread to PCIe Function Mapping */ if (hw_data->configure_iov_threads) diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 86187671893c..cb3bdd3618fb 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -126,22 +126,19 @@ static void enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) } } -static void disable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) +static void disable_all_vf2pf_interrupts(void __iomem *pmisc_addr) { + u32 val; + /* Disable VF2PF interrupts for VFs 0 through 15 per vf_mask[15:0] */ - if (vf_mask & 0xFFFF) { - u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3) - | ADF_DH895XCC_ERR_MSK_VF2PF_L(vf_mask); - ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val); - } + val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3) + | ADF_DH895XCC_ERR_MSK_VF2PF_L(ADF_DH895XCC_VF_MSK); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val); /* Disable VF2PF interrupts for VFs 16 through 31 per vf_mask[31:16] */ - if (vf_mask >> 16) { - u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5) - | ADF_DH895XCC_ERR_MSK_VF2PF_U(vf_mask); - - ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, val); - } + val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5) + | ADF_DH895XCC_ERR_MSK_VF2PF_U(ADF_DH895XCC_VF_MSK); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, val); } static u32 disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) @@ -240,7 +237,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts; - hw_data->pfvf_ops.disable_vf2pf_interrupts = disable_vf2pf_interrupts; + hw_data->pfvf_ops.disable_all_vf2pf_interrupts = disable_all_vf2pf_interrupts; hw_data->pfvf_ops.disable_pending_vf2pf_interrupts = disable_pending_vf2pf_interrupts; adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } From fa374954836779a08cfb773ff20fe2083cb9d420 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:54 +0100 Subject: [PATCH 032/116] crypto: qat - use u32 variables in all GEN4 pfvf_ops Change adf_gen4_enable_vf2pf_interrupts() to use a u32 variable, consistently with both other GEN4 pfvf_ops and pfvf_ops of other generations. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_gen4_pfvf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c index 8091fc52e13a..73ec8defb2d3 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c @@ -40,7 +40,7 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { - unsigned int val; + u32 val; val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask; ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); From ebd26229a7b343268c231008f295841ef45d0b96 Mon Sep 17 00:00:00 2001 From: Marco Chiappero Date: Thu, 7 Apr 2022 17:54:55 +0100 Subject: [PATCH 033/116] crypto: qat - remove line wrapping for pfvf_ops functions Remove unnecessary line wrapping for the adf_enable_vf2pf_interrupts() function, and harmonize pfvf_ops text. Signed-off-by: Marco Chiappero Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_common_drv.h | 3 +-- drivers/crypto/qat/qat_common/adf_gen2_pfvf.c | 3 +-- drivers/crypto/qat/qat_common/adf_gen4_pfvf.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index da9d765834f0..0464fa257929 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -195,8 +195,7 @@ bool adf_misc_wq_queue_work(struct work_struct *work); #if defined(CONFIG_PCI_IOV) int adf_sriov_configure(struct pci_dev *pdev, int numvfs); void adf_disable_sriov(struct adf_accel_dev *accel_dev); -void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, - u32 vf_mask); +void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask); void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev); bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev); bool adf_recv_and_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u32 vf_nr); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c index 606409533409..70ef11963938 100644 --- a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c @@ -51,8 +51,7 @@ static u32 adf_gen2_vf_get_pfvf_offset(u32 i) return ADF_GEN2_VF_PF2VF_OFFSET; } -static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr, - u32 vf_mask) +static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { /* Enable VF2PF Messaging Ints - VFs 0 through 15 per vf_mask[15:0] */ if (vf_mask & ADF_GEN2_VF_MSK) { diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c index 73ec8defb2d3..8e8efe93f3ee 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c @@ -37,8 +37,7 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) return ADF_4XXX_VM2PF_OFFSET(i); } -static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, - u32 vf_mask) +static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { u32 val; From 716a757c83ad6208a743dd8fb1577055d0867ee8 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 8 Apr 2022 10:09:12 +0000 Subject: [PATCH 034/116] hwrng: mpfs - add polarfire soc hwrng support Add a driver to access the hardware random number generator on the Polarfire SoC. The hwrng can only be accessed via the system controller, so use the mailbox interface the system controller exposes to access the hwrng. Signed-off-by: Conor Dooley Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 13 ++++ drivers/char/hw_random/Makefile | 1 + drivers/char/hw_random/mpfs-rng.c | 104 ++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 drivers/char/hw_random/mpfs-rng.c diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index a087156a5818..c3a9f17bf31c 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -385,6 +385,19 @@ config HW_RANDOM_PIC32 If unsure, say Y. +config HW_RANDOM_POLARFIRE_SOC + tristate "Microchip PolarFire SoC Random Number Generator support" + depends on HW_RANDOM && POLARFIRE_SOC_SYS_CTRL + help + This driver provides kernel-side support for the Random Number + Generator hardware found on PolarFire SoC (MPFS). + + To compile this driver as a module, choose M here. The + module will be called mfps_rng. + + If unsure, say N. + + config HW_RANDOM_MESON tristate "Amlogic Meson Random Number Generator support" depends on HW_RANDOM diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 584d47ba32f7..3e948cf04476 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -46,3 +46,4 @@ obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o obj-$(CONFIG_HW_RANDOM_CN10K) += cn10k-rng.o +obj-$(CONFIG_HW_RANDOM_POLARFIRE_SOC) += mpfs-rng.o diff --git a/drivers/char/hw_random/mpfs-rng.c b/drivers/char/hw_random/mpfs-rng.c new file mode 100644 index 000000000000..5813da617a48 --- /dev/null +++ b/drivers/char/hw_random/mpfs-rng.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Microchip PolarFire SoC (MPFS) hardware random driver + * + * Copyright (c) 2020-2022 Microchip Corporation. All rights reserved. + * + * Author: Conor Dooley + */ + +#include +#include +#include +#include + +#define CMD_OPCODE 0x21 +#define CMD_DATA_SIZE 0U +#define CMD_DATA NULL +#define MBOX_OFFSET 0U +#define RESP_OFFSET 0U +#define RNG_RESP_BYTES 32U + +struct mpfs_rng { + struct mpfs_sys_controller *sys_controller; + struct hwrng rng; +}; + +static int mpfs_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) +{ + struct mpfs_rng *rng_priv = container_of(rng, struct mpfs_rng, rng); + u32 response_msg[RNG_RESP_BYTES / sizeof(u32)]; + unsigned int count = 0, copy_size_bytes; + int ret; + + struct mpfs_mss_response response = { + .resp_status = 0U, + .resp_msg = (u32 *)response_msg, + .resp_size = RNG_RESP_BYTES + }; + struct mpfs_mss_msg msg = { + .cmd_opcode = CMD_OPCODE, + .cmd_data_size = CMD_DATA_SIZE, + .response = &response, + .cmd_data = CMD_DATA, + .mbox_offset = MBOX_OFFSET, + .resp_offset = RESP_OFFSET + }; + + while (count < max) { + ret = mpfs_blocking_transaction(rng_priv->sys_controller, &msg); + if (ret) + return ret; + + copy_size_bytes = max - count > RNG_RESP_BYTES ? RNG_RESP_BYTES : max - count; + memcpy(buf + count, response_msg, copy_size_bytes); + + count += copy_size_bytes; + if (!wait) + break; + } + + return count; +} + +static int mpfs_rng_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mpfs_rng *rng_priv; + int ret; + + rng_priv = devm_kzalloc(dev, sizeof(*rng_priv), GFP_KERNEL); + if (!rng_priv) + return -ENOMEM; + + rng_priv->sys_controller = mpfs_sys_controller_get(&pdev->dev); + if (IS_ERR(rng_priv->sys_controller)) + return dev_err_probe(dev, PTR_ERR(rng_priv->sys_controller), + "Failed to register system controller hwrng sub device\n"); + + rng_priv->rng.read = mpfs_rng_read; + rng_priv->rng.name = pdev->name; + rng_priv->rng.quality = 1024; + + platform_set_drvdata(pdev, rng_priv); + + ret = devm_hwrng_register(&pdev->dev, &rng_priv->rng); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to register MPFS hwrng\n"); + + dev_info(&pdev->dev, "Registered MPFS hwrng\n"); + + return 0; +} + +static struct platform_driver mpfs_rng_driver = { + .driver = { + .name = "mpfs-rng", + }, + .probe = mpfs_rng_probe, +}; +module_platform_driver(mpfs_rng_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Conor Dooley "); +MODULE_DESCRIPTION("PolarFire SoC (MPFS) hardware random driver"); From 10299073bc35ce051530fef318598b2f8b36e383 Mon Sep 17 00:00:00 2001 From: Yihao Han Date: Fri, 8 Apr 2022 07:23:49 -0700 Subject: [PATCH 035/116] crypto: ux500/hash - simplify if-if to if-else Replace `if (!req_ctx->updated)` with `else` for simplification and add curly brackets according to the kernel coding style: "Do not unnecessarily use braces where a single statement will do." ... "This does not apply if only one branch of a conditional statement is a single statement; in the latter case use braces in both branches" Please refer to: https://www.kernel.org/doc/html/v5.17-rc8/process/coding-style.html Signed-off-by: Yihao Han Signed-off-by: Herbert Xu --- drivers/crypto/ux500/hash/hash_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 5157c118d642..265ef3e96fdd 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -877,9 +877,7 @@ static int hash_dma_final(struct ahash_request *req) __func__); goto out; } - } - - if (!req_ctx->updated) { + } else { ret = hash_setconfiguration(device_data, &ctx->config); if (ret) { dev_err(device_data->dev, From f1724d397c60d296c0805c95a46ae7fc7163b70c Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:18 +0800 Subject: [PATCH 036/116] crypto: hisilicon/qm - add register checking for ACC Add register detection function to accelerator. Provided a tool that user can checking differential register through Debugfs. e.g. cd /sys/kernel/debug/hisi_zip//zip_dfx cat diff_regs Signed-off-by: Longfang Liu Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 182 +++++++++++++++++++++++++++++++++- include/linux/hisi_acc_qm.h | 14 +++ 2 files changed, 195 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index c5c507f2d779..5e0695a8900c 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -253,7 +253,15 @@ #define QM_QOS_MAX_CIR_U 6 #define QM_QOS_MAX_CIR_S 11 #define QM_QOS_VAL_MAX_LEN 32 - +#define QM_DFX_BASE 0x0100000 +#define QM_DFX_STATE1 0x0104000 +#define QM_DFX_STATE2 0x01040C8 +#define QM_DFX_COMMON 0x0000 +#define QM_DFX_BASE_LEN 0x5A +#define QM_DFX_STATE1_LEN 0x2E +#define QM_DFX_STATE2_LEN 0x11 +#define QM_DFX_COMMON_LEN 0xC3 +#define QM_DFX_REGS_LEN 4UL #define QM_AUTOSUSPEND_DELAY 3000 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ @@ -467,6 +475,23 @@ static const struct hisi_qm_hw_error qm_hw_error[] = { { /* sentinel */ } }; +/* define the QM's dfx regs region and region length */ +static struct dfx_diff_registers qm_diff_regs[] = { + { + .reg_offset = QM_DFX_BASE, + .reg_len = QM_DFX_BASE_LEN, + }, { + .reg_offset = QM_DFX_STATE1, + .reg_len = QM_DFX_STATE1_LEN, + }, { + .reg_offset = QM_DFX_STATE2, + .reg_len = QM_DFX_STATE2_LEN, + }, { + .reg_offset = QM_DFX_COMMON, + .reg_len = QM_DFX_COMMON_LEN, + }, +}; + static const char * const qm_db_timeout[] = { "sq", "cq", "eq", "aeq", }; @@ -1625,6 +1650,156 @@ static int qm_regs_show(struct seq_file *s, void *unused) DEFINE_SHOW_ATTRIBUTE(qm_regs); +static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm, + const struct dfx_diff_registers *cregs, int reg_len) +{ + struct dfx_diff_registers *diff_regs; + u32 j, base_offset; + int i; + + diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL); + if (!diff_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < reg_len; i++) { + if (!cregs[i].reg_len) + continue; + + diff_regs[i].reg_offset = cregs[i].reg_offset; + diff_regs[i].reg_len = cregs[i].reg_len; + diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len, + GFP_KERNEL); + if (!diff_regs[i].regs) + goto alloc_error; + + for (j = 0; j < diff_regs[i].reg_len; j++) { + base_offset = diff_regs[i].reg_offset + + j * QM_DFX_REGS_LEN; + diff_regs[i].regs[j] = readl(qm->io_base + base_offset); + } + } + + return diff_regs; + +alloc_error: + while (i > 0) { + i--; + kfree(diff_regs[i].regs); + } + kfree(diff_regs); + return ERR_PTR(-ENOMEM); +} + +static void dfx_regs_uninit(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len) +{ + int i; + + /* Setting the pointer is NULL to prevent double free */ + for (i = 0; i < reg_len; i++) { + kfree(dregs[i].regs); + dregs[i].regs = NULL; + } + kfree(dregs); + dregs = NULL; +} + +/** + * hisi_qm_diff_regs_init() - Allocate memory for registers. + * @qm: device qm handle. + * @dregs: diff registers handle. + * @reg_len: diff registers region length. + */ +int hisi_qm_diff_regs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len) +{ + if (!qm || !dregs || reg_len <= 0) + return -EINVAL; + + if (qm->fun_type != QM_HW_PF) + return 0; + + qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + if (IS_ERR(qm->debug.qm_diff_regs)) + return PTR_ERR(qm->debug.qm_diff_regs); + + qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); + if (IS_ERR(qm->debug.acc_diff_regs)) { + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + return PTR_ERR(qm->debug.acc_diff_regs); + } + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init); + +/** + * hisi_qm_diff_regs_uninit() - Free memory for registers. + * @qm: device qm handle. + * @reg_len: diff registers region length. + */ +void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len) +{ + if (!qm || reg_len <= 0 || qm->fun_type != QM_HW_PF) + return; + + dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); +} +EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit); + +/** + * hisi_qm_acc_diff_regs_dump() - Dump registers's value. + * @qm: device qm handle. + * @s: Debugfs file handle. + * @dregs: diff registers handle. + * @regs_len: diff registers region length. + */ +void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, + struct dfx_diff_registers *dregs, int regs_len) +{ + u32 j, val, base_offset; + int i, ret; + + if (!qm || !s || !dregs || regs_len <= 0) + return; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return; + + down_read(&qm->qps_lock); + for (i = 0; i < regs_len; i++) { + if (!dregs[i].reg_len) + continue; + + for (j = 0; j < dregs[i].reg_len; j++) { + base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN; + val = readl(qm->io_base + base_offset); + if (val != dregs[i].regs[j]) + seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n", + base_offset, dregs[i].regs[j], val); + } + } + up_read(&qm->qps_lock); + + hisi_qm_put_dfx_access(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump); + +static int qm_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(qm_diff_regs); + static ssize_t qm_cmd_read(struct file *filp, char __user *buffer, size_t count, loff_t *pos) { @@ -4484,6 +4659,7 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm) */ void hisi_qm_debug_init(struct hisi_qm *qm) { + struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs; struct qm_dfx *dfx = &qm->debug.dfx; struct dentry *qm_d; void *data; @@ -4499,6 +4675,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm) qm_create_debugfs_file(qm, qm->debug.qm_d, i); } + if (qm_regs) + debugfs_create_file("diff_regs", 0444, qm->debug.qm_d, + qm, &qm_diff_regs_fops); + debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops); diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 177f7b7cd414..39acc0316a60 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -168,6 +168,12 @@ enum qm_vf_state { QM_NOT_READY, }; +struct dfx_diff_registers { + u32 *regs; + u32 reg_offset; + u32 reg_len; +}; + struct qm_dfx { atomic64_t err_irq_cnt; atomic64_t aeq_irq_cnt; @@ -190,6 +196,8 @@ struct qm_debug { struct dentry *debug_root; struct dentry *qm_d; struct debugfs_file files[DEBUG_FILE_NUM]; + struct dfx_diff_registers *qm_diff_regs; + struct dfx_diff_registers *acc_diff_regs; }; struct qm_shaper_factor { @@ -448,6 +456,12 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs); void hisi_qm_dev_err_init(struct hisi_qm *qm); void hisi_qm_dev_err_uninit(struct hisi_qm *qm); +int hisi_qm_diff_regs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len); +void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len); +void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, + struct dfx_diff_registers *dregs, int regs_len); + pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, pci_channel_state_t state); pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev); From 9210bdaa0d49d271fcff901d47ecadfaf1786a76 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:19 +0800 Subject: [PATCH 037/116] crypto: hisilicon/hpre - support register checking The value of the register is changed after the task running. A debugfs file node is added to help users to check the change of register values. Signed-off-by: Longfang Liu Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 90 ++++++++++++++++++----- 1 file changed, 72 insertions(+), 18 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 36ab30e9e654..d3f73e38709a 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -107,6 +107,15 @@ #define HPRE_SQE_MASK_OFFSET 8 #define HPRE_SQE_MASK_LEN 24 +#define HPRE_DFX_BASE 0x301000 +#define HPRE_DFX_COMMON1 0x301400 +#define HPRE_DFX_COMMON2 0x301A00 +#define HPRE_DFX_CORE 0x302000 +#define HPRE_DFX_BASE_LEN 0x55 +#define HPRE_DFX_COMMON1_LEN 0x41 +#define HPRE_DFX_COMMON2_LEN 0xE +#define HPRE_DFX_CORE_LEN 0x43 + static const char hpre_name[] = "hisi_hpre"; static struct dentry *hpre_debugfs_root; static const struct pci_device_id hpre_dev_ids[] = { @@ -226,6 +235,53 @@ static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = { "invalid_req_cnt" }; +/* define the HPRE's dfx regs region and region length */ +static struct dfx_diff_registers hpre_diff_regs[] = { + { + .reg_offset = HPRE_DFX_BASE, + .reg_len = HPRE_DFX_BASE_LEN, + }, { + .reg_offset = HPRE_DFX_COMMON1, + .reg_len = HPRE_DFX_COMMON1_LEN, + }, { + .reg_offset = HPRE_DFX_COMMON2, + .reg_len = HPRE_DFX_COMMON2_LEN, + }, { + .reg_offset = HPRE_DFX_CORE, + .reg_len = HPRE_DFX_CORE_LEN, + }, +}; + +static int hpre_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs, + ARRAY_SIZE(hpre_diff_regs)); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_diff_regs); + +static int hpre_com_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_com_regs); + +static int hpre_cluster_regs_show(struct seq_file *s, void *unused) +{ + hisi_qm_regs_dump(s, s->private); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs); + static const struct kernel_param_ops hpre_uacce_mode_ops = { .set = uacce_mode_set, .get = param_get_int, @@ -779,24 +835,6 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get, hpre_debugfs_atomic64_set, "%llu\n"); -static int hpre_com_regs_show(struct seq_file *s, void *unused) -{ - hisi_qm_regs_dump(s, s->private); - - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(hpre_com_regs); - -static int hpre_cluster_regs_show(struct seq_file *s, void *unused) -{ - hisi_qm_regs_dump(s, s->private); - - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs); - static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, enum hpre_ctrl_dbgfs_file type, int indx) { @@ -895,6 +933,7 @@ static int hpre_ctrl_debug_init(struct hisi_qm *qm) static void hpre_dfx_debug_init(struct hisi_qm *qm) { + struct dfx_diff_registers *hpre_regs = qm->debug.acc_diff_regs; struct hpre *hpre = container_of(qm, struct hpre, qm); struct hpre_dfx *dfx = hpre->debug.dfx; struct dentry *parent; @@ -906,6 +945,10 @@ static void hpre_dfx_debug_init(struct hisi_qm *qm) debugfs_create_file(hpre_dfx_files[i], 0644, parent, &dfx[i], &hpre_atomic64_ops); } + + if (qm->fun_type == QM_HW_PF && hpre_regs) + debugfs_create_file("diff_regs", 0444, parent, + qm, &hpre_diff_regs_fops); } static int hpre_debugfs_init(struct hisi_qm *qm) @@ -918,6 +961,13 @@ static int hpre_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; + ret = hisi_qm_diff_regs_init(qm, hpre_diff_regs, + ARRAY_SIZE(hpre_diff_regs)); + if (ret) { + dev_warn(dev, "Failed to init HPRE diff regs!\n"); + goto debugfs_remove; + } + hisi_qm_debug_init(qm); if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) { @@ -931,12 +981,16 @@ static int hpre_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); +debugfs_remove: debugfs_remove_recursive(qm->debug.debug_root); return ret; } static void hpre_debugfs_exit(struct hisi_qm *qm) { + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); + debugfs_remove_recursive(qm->debug.debug_root); } From 16175030bb5b53ab125480af6cfb73ae8064d780 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:20 +0800 Subject: [PATCH 038/116] crypto: hisilicon/sec - support register checking The value of the register is changed after the task running. A debugfs file node is added to help users to check the change of register values. Signed-off-by: Longfang Liu Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 53 ++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 92fae706bdb2..93ef0e3b5b16 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -110,6 +110,15 @@ #define SEC_SQE_MASK_LEN 48 #define SEC_SHAPER_TYPE_RATE 400 +#define SEC_DFX_BASE 0x301000 +#define SEC_DFX_CORE 0x302100 +#define SEC_DFX_COMMON1 0x301600 +#define SEC_DFX_COMMON2 0x301C00 +#define SEC_DFX_BASE_LEN 0x9D +#define SEC_DFX_CORE_LEN 0x32B +#define SEC_DFX_COMMON1_LEN 0x45 +#define SEC_DFX_COMMON2_LEN 0xBA + struct sec_hw_error { u32 int_msk; const char *msg; @@ -226,6 +235,34 @@ static const struct debugfs_reg32 sec_dfx_regs[] = { {"SEC_BD_SAA8 ", 0x301C40}, }; +/* define the SEC's dfx regs region and region length */ +static struct dfx_diff_registers sec_diff_regs[] = { + { + .reg_offset = SEC_DFX_BASE, + .reg_len = SEC_DFX_BASE_LEN, + }, { + .reg_offset = SEC_DFX_COMMON1, + .reg_len = SEC_DFX_COMMON1_LEN, + }, { + .reg_offset = SEC_DFX_COMMON2, + .reg_len = SEC_DFX_COMMON2_LEN, + }, { + .reg_offset = SEC_DFX_CORE, + .reg_len = SEC_DFX_CORE_LEN, + }, +}; + +static int sec_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs, + ARRAY_SIZE(sec_diff_regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(sec_diff_regs); + static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp) { return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF); @@ -729,6 +766,7 @@ DEFINE_SHOW_ATTRIBUTE(sec_regs); static int sec_core_debug_init(struct hisi_qm *qm) { + struct dfx_diff_registers *sec_regs = qm->debug.acc_diff_regs; struct sec_dev *sec = container_of(qm, struct sec_dev, qm); struct device *dev = &qm->pdev->dev; struct sec_dfx *dfx = &sec->debug.dfx; @@ -749,6 +787,9 @@ static int sec_core_debug_init(struct hisi_qm *qm) if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops); + if (qm->fun_type == QM_HW_PF && sec_regs) + debugfs_create_file("diff_regs", 0444, tmp_d, + qm, &sec_diff_regs_fops); for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) { atomic64_t *data = (atomic64_t *)((uintptr_t)dfx + @@ -790,6 +831,14 @@ static int sec_debugfs_init(struct hisi_qm *qm) sec_debugfs_root); qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; + + ret = hisi_qm_diff_regs_init(qm, sec_diff_regs, + ARRAY_SIZE(sec_diff_regs)); + if (ret) { + dev_warn(dev, "Failed to init SEC diff regs!\n"); + goto debugfs_remove; + } + hisi_qm_debug_init(qm); ret = sec_debug_init(qm); @@ -799,12 +848,16 @@ static int sec_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); +debugfs_remove: debugfs_remove_recursive(sec_debugfs_root); return ret; } static void sec_debugfs_exit(struct hisi_qm *qm) { + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); + debugfs_remove_recursive(qm->debug.debug_root); } From 9b0c97dfc215b87208a699870881a69e762301ca Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:21 +0800 Subject: [PATCH 039/116] crypto: hisilicon/zip - support register checking The value of the register is changed after the task running. A debugfs file node is added to help users to check the change of register values. Signed-off-by: Longfang Liu Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/zip/zip_main.c | 78 ++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 4534e1e107d1..2d5663d8d87f 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -49,14 +49,18 @@ #define HZIP_QM_IDEL_STATUS 0x3040e4 -#define HZIP_CORE_DEBUG_COMP_0 0x302000 -#define HZIP_CORE_DEBUG_COMP_1 0x303000 -#define HZIP_CORE_DEBUG_DECOMP_0 0x304000 -#define HZIP_CORE_DEBUG_DECOMP_1 0x305000 -#define HZIP_CORE_DEBUG_DECOMP_2 0x306000 -#define HZIP_CORE_DEBUG_DECOMP_3 0x307000 -#define HZIP_CORE_DEBUG_DECOMP_4 0x308000 -#define HZIP_CORE_DEBUG_DECOMP_5 0x309000 +#define HZIP_CORE_DFX_BASE 0x301000 +#define HZIP_CLOCK_GATED_CONTL 0X301004 +#define HZIP_CORE_DFX_COMP_0 0x302000 +#define HZIP_CORE_DFX_COMP_1 0x303000 +#define HZIP_CORE_DFX_DECOMP_0 0x304000 +#define HZIP_CORE_DFX_DECOMP_1 0x305000 +#define HZIP_CORE_DFX_DECOMP_2 0x306000 +#define HZIP_CORE_DFX_DECOMP_3 0x307000 +#define HZIP_CORE_DFX_DECOMP_4 0x308000 +#define HZIP_CORE_DFX_DECOMP_5 0x309000 +#define HZIP_CORE_REGS_BASE_LEN 0xB0 +#define HZIP_CORE_REGS_DFX_LEN 0x28 #define HZIP_CORE_INT_SOURCE 0x3010A0 #define HZIP_CORE_INT_MASK_REG 0x3010A4 @@ -230,6 +234,48 @@ static const struct debugfs_reg32 hzip_dfx_regs[] = { {"HZIP_DECOMP_LZ77_CURR_ST ", 0x9cull}, }; +/* define the ZIP's dfx regs region and region length */ +static struct dfx_diff_registers hzip_diff_regs[] = { + { + .reg_offset = HZIP_CORE_DFX_BASE, + .reg_len = HZIP_CORE_REGS_BASE_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_COMP_0, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_COMP_1, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_0, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_1, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_2, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_3, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_4, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, { + .reg_offset = HZIP_CORE_DFX_DECOMP_5, + .reg_len = HZIP_CORE_REGS_DFX_LEN, + }, +}; + +static int hzip_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs, + ARRAY_SIZE(hzip_diff_regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(hzip_diff_regs); static const struct kernel_param_ops zip_uacce_mode_ops = { .set = uacce_mode_set, .get = param_get_int, @@ -621,6 +667,7 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm) static void hisi_zip_dfx_debug_init(struct hisi_qm *qm) { + struct dfx_diff_registers *hzip_regs = qm->debug.acc_diff_regs; struct hisi_zip *zip = container_of(qm, struct hisi_zip, qm); struct hisi_zip_dfx *dfx = &zip->dfx; struct dentry *tmp_dir; @@ -634,6 +681,10 @@ static void hisi_zip_dfx_debug_init(struct hisi_qm *qm) 0644, tmp_dir, data, &zip_atomic64_ops); } + + if (qm->fun_type == QM_HW_PF && hzip_regs) + debugfs_create_file("diff_regs", 0444, tmp_dir, + qm, &hzip_diff_regs_fops); } static int hisi_zip_ctrl_debug_init(struct hisi_qm *qm) @@ -666,6 +717,13 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; qm->debug.debug_root = dev_d; + ret = hisi_qm_diff_regs_init(qm, hzip_diff_regs, + ARRAY_SIZE(hzip_diff_regs)); + if (ret) { + dev_warn(dev, "Failed to init ZIP diff regs!\n"); + goto debugfs_remove; + } + hisi_qm_debug_init(qm); if (qm->fun_type == QM_HW_PF) { @@ -679,6 +737,8 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); +debugfs_remove: debugfs_remove_recursive(hzip_debugfs_root); return ret; } @@ -703,6 +763,8 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm) static void hisi_zip_debugfs_exit(struct hisi_qm *qm) { + hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); + debugfs_remove_recursive(qm->debug.debug_root); if (qm->fun_type == QM_HW_PF) { From a7dbdfda0c42bda6088d02da15c4e56f5094426a Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:22 +0800 Subject: [PATCH 040/116] Documentation: update debugfs doc for Hisilicon HPRE Update documentation describing DebugFS that could help to check the change of register values. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- Documentation/ABI/testing/debugfs-hisi-hpre | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre index 396de7bc735d..82abf92df429 100644 --- a/Documentation/ABI/testing/debugfs-hisi-hpre +++ b/Documentation/ABI/testing/debugfs-hisi-hpre @@ -104,6 +104,20 @@ Description: Dump the status of the QM. Four states: initiated, started, stopped and closed. Available for both PF and VF, and take no other effect on HPRE. +What: /sys/kernel/debug/hisi_hpre//qm/diff_regs +Date: Mar 2022 +Contact: linux-crypto@vger.kernel.org +Description: QM debug registers(regs) read hardware register value. This + node is used to show the change of the qm register values. This + node can be help users to check the change of register values. + +What: /sys/kernel/debug/hisi_hpre//hpre_dfx/diff_regs +Date: Mar 2022 +Contact: linux-crypto@vger.kernel.org +Description: HPRE debug registers(regs) read hardware register value. This + node is used to show the change of the register values. This + node can be help users to check the change of register values. + What: /sys/kernel/debug/hisi_hpre//hpre_dfx/send_cnt Date: Apr 2020 Contact: linux-crypto@vger.kernel.org From 73e3b46e90a34657bc31fa4886f21800041fd397 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:23 +0800 Subject: [PATCH 041/116] Documentation: update debugfs doc for Hisilicon SEC Update documentation describing DebugFS that could help to check the change of register values. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- Documentation/ABI/testing/debugfs-hisi-sec | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec index 2bf84ced484b..93c530d1bf0f 100644 --- a/Documentation/ABI/testing/debugfs-hisi-sec +++ b/Documentation/ABI/testing/debugfs-hisi-sec @@ -84,6 +84,20 @@ Description: Dump the status of the QM. Four states: initiated, started, stopped and closed. Available for both PF and VF, and take no other effect on SEC. +What: /sys/kernel/debug/hisi_sec2//qm/diff_regs +Date: Mar 2022 +Contact: linux-crypto@vger.kernel.org +Description: QM debug registers(regs) read hardware register value. This + node is used to show the change of the qm register values. This + node can be help users to check the change of register values. + +What: /sys/kernel/debug/hisi_sec2//sec_dfx/diff_regs +Date: Mar 2022 +Contact: linux-crypto@vger.kernel.org +Description: SEC debug registers(regs) read hardware register value. This + node is used to show the change of the register values. This + node can be help users to check the change of register values. + What: /sys/kernel/debug/hisi_sec2//sec_dfx/send_cnt Date: Apr 2020 Contact: linux-crypto@vger.kernel.org From 30169c5b550a71a47b0aecd9c254dc3a1d5bf4a7 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:24 +0800 Subject: [PATCH 042/116] Documentation: update debugfs doc for Hisilicon ZIP Update documentation describing DebugFS that could help to check the change of register values. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- Documentation/ABI/testing/debugfs-hisi-zip | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip index bf1258bc6495..fd3f314cf8d1 100644 --- a/Documentation/ABI/testing/debugfs-hisi-zip +++ b/Documentation/ABI/testing/debugfs-hisi-zip @@ -97,6 +97,20 @@ Description: Dump the status of the QM. Four states: initiated, started, stopped and closed. Available for both PF and VF, and take no other effect on ZIP. +What: /sys/kernel/debug/hisi_zip//qm/diff_regs +Date: Mar 2022 +Contact: linux-crypto@vger.kernel.org +Description: QM debug registers(regs) read hardware register value. This + node is used to show the change of the qm registers value. This + node can be help users to check the change of register values. + +What: /sys/kernel/debug/hisi_zip//zip_dfx/diff_regs +Date: Mar 2022 +Contact: linux-crypto@vger.kernel.org +Description: ZIP debug registers(regs) read hardware register value. This + node is used to show the change of the registers value. this + node can be help users to check the change of register values. + What: /sys/kernel/debug/hisi_zip//zip_dfx/send_cnt Date: Apr 2020 Contact: linux-crypto@vger.kernel.org From a888ccd6c66683a49977ba6a2b91fe52fbec9367 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:25 +0800 Subject: [PATCH 043/116] crypto: hisilicon/qm - add last word dumping for ACC Add last word dumping function during acc engines controller reset. The last words are reported to the printed information during the reset. The dmesg information included qm debugging registers and engine debugging registers. It can help to improve debugging capability. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 57 +++++++++++++++++++++++++++++++++++ include/linux/hisi_acc_qm.h | 4 +++ 2 files changed, 61 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 5e0695a8900c..7f5c3186a895 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3673,6 +3673,17 @@ static void hisi_qm_set_state(struct hisi_qm *qm, u8 state) writel(state, qm->io_base + QM_VF_STATE); } +static void qm_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + kfree(debug->qm_last_words); + debug->qm_last_words = NULL; +} + /** * hisi_qm_uninit() - Uninitialize qm. * @qm: The qm needed uninit. @@ -3684,6 +3695,8 @@ void hisi_qm_uninit(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; struct device *dev = &pdev->dev; + qm_last_regs_uninit(qm); + qm_cmd_uninit(qm); kfree(qm->factor); down_write(&qm->qps_lock); @@ -5361,6 +5374,24 @@ static int qm_controller_reset_done(struct hisi_qm *qm) return 0; } +static void qm_show_last_dfx_regs(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + struct pci_dev *pdev = qm->pdev; + u32 val; + int i; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) { + val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset); + if (debug->qm_last_words[i] != val) + pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n", + qm_dfx_regs[i].name, debug->qm_last_words[i], val); + } +} + static int qm_controller_reset(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -5376,6 +5407,10 @@ static int qm_controller_reset(struct hisi_qm *qm) return ret; } + qm_show_last_dfx_regs(qm); + if (qm->err_ini->show_last_dfx_regs) + qm->err_ini->show_last_dfx_regs(qm); + ret = qm_soft_reset(qm); if (ret) { pci_err(pdev, "Controller reset failed (%d)\n", ret); @@ -6086,6 +6121,26 @@ static int hisi_qm_memory_init(struct hisi_qm *qm) return ret; } +static void qm_last_regs_init(struct hisi_qm *qm) +{ + int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs); + struct qm_debug *debug = &qm->debug; + int i; + + if (qm->fun_type == QM_HW_VF) + return; + + debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), + GFP_KERNEL); + if (!debug->qm_last_words) + return; + + for (i = 0; i < dfx_regs_num; i++) { + debug->qm_last_words[i] = readl_relaxed(qm->io_base + + qm_dfx_regs[i].offset); + } +} + /** * hisi_qm_init() - Initialize configures about qm. * @qm: The qm needing init. @@ -6138,6 +6193,8 @@ int hisi_qm_init(struct hisi_qm *qm) qm_cmd_init(qm); atomic_set(&qm->status.flags, QM_INIT); + qm_last_regs_init(qm); + return 0; err_alloc_uacce: diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 39acc0316a60..e5522eaf88fd 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -196,6 +196,9 @@ struct qm_debug { struct dentry *debug_root; struct dentry *qm_d; struct debugfs_file files[DEBUG_FILE_NUM]; + unsigned int *qm_last_words; + /* ACC engines recoreding last regs */ + unsigned int *last_words; struct dfx_diff_registers *qm_diff_regs; struct dfx_diff_registers *acc_diff_regs; }; @@ -251,6 +254,7 @@ struct hisi_qm_err_ini { void (*open_sva_prefetch)(struct hisi_qm *qm); void (*close_sva_prefetch)(struct hisi_qm *qm); void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); + void (*show_last_dfx_regs)(struct hisi_qm *qm); void (*err_info_init)(struct hisi_qm *qm); }; From 8a88d0914529f3558bb160cb862ef4daafebb7b4 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:26 +0800 Subject: [PATCH 044/116] crypto: hisilicon/sec - support last word dumping Add last word dumping function during sec engine controller reset. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 55 +++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 93ef0e3b5b16..4d85d2cbf376 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -861,6 +861,53 @@ static void sec_debugfs_exit(struct hisi_qm *qm) debugfs_remove_recursive(qm->debug.debug_root); } +static int sec_show_last_regs_init(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + int i; + + debug->last_words = kcalloc(ARRAY_SIZE(sec_dfx_regs), + sizeof(unsigned int), GFP_KERNEL); + if (!debug->last_words) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++) + debug->last_words[i] = readl_relaxed(qm->io_base + + sec_dfx_regs[i].offset); + + return 0; +} + +static void sec_show_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + kfree(debug->last_words); + debug->last_words = NULL; +} + +static void sec_show_last_dfx_regs(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + struct pci_dev *pdev = qm->pdev; + u32 val; + int i; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + /* dumps last word of the debugging registers during controller reset */ + for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++) { + val = readl_relaxed(qm->io_base + sec_dfx_regs[i].offset); + if (val != debug->last_words[i]) + pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n", + sec_dfx_regs[i].name, debug->last_words[i], val); + } +} + static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts) { const struct sec_hw_error *errs = sec_hw_errors; @@ -927,6 +974,7 @@ static const struct hisi_qm_err_ini sec_err_ini = { .open_axi_master_ooo = sec_open_axi_master_ooo, .open_sva_prefetch = sec_open_sva_prefetch, .close_sva_prefetch = sec_close_sva_prefetch, + .show_last_dfx_regs = sec_show_last_dfx_regs, .err_info_init = sec_err_info_init, }; @@ -945,8 +993,11 @@ static int sec_pf_probe_init(struct sec_dev *sec) sec_open_sva_prefetch(qm); hisi_qm_dev_err_init(qm); sec_debug_regs_clear(qm); + ret = sec_show_last_regs_init(qm); + if (ret) + pci_err(qm->pdev, "Failed to init last word regs!\n"); - return 0; + return ret; } static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) @@ -1120,6 +1171,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) sec_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); err_probe_uninit: + sec_show_last_regs_uninit(qm); sec_probe_uninit(qm); err_qm_uninit: sec_qm_uninit(qm); @@ -1144,6 +1196,7 @@ static void sec_remove(struct pci_dev *pdev) if (qm->fun_type == QM_HW_PF) sec_debug_regs_clear(qm); + sec_show_last_regs_uninit(qm); sec_probe_uninit(qm); From 42123e81fdba56ce5712a1f5d39a600c4d44ddad Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:27 +0800 Subject: [PATCH 045/116] crypto: hisilicon/hpre - support last word dumping 1. Add some debugging registers. 2. Add last word dumping function during hpre engine controller reset. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 132 ++++++++++++++++++---- 1 file changed, 112 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index d3f73e38709a..9d529df0eab9 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -36,6 +36,12 @@ #define HPRE_DATA_WUSER_CFG 0x301040 #define HPRE_INT_MASK 0x301400 #define HPRE_INT_STATUS 0x301800 +#define HPRE_HAC_INT_MSK 0x301400 +#define HPRE_HAC_RAS_CE_ENB 0x301410 +#define HPRE_HAC_RAS_NFE_ENB 0x301414 +#define HPRE_HAC_RAS_FE_ENB 0x301418 +#define HPRE_HAC_INT_SET 0x301500 +#define HPRE_RNG_TIMEOUT_NUM 0x301A34 #define HPRE_CORE_INT_ENABLE 0 #define HPRE_CORE_INT_DISABLE GENMASK(21, 0) #define HPRE_RDCHN_INI_ST 0x301a00 @@ -201,28 +207,32 @@ static const u64 hpre_cluster_offsets[] = { }; static const struct debugfs_reg32 hpre_cluster_dfx_regs[] = { - {"CORES_EN_STATUS ", HPRE_CORE_EN_OFFSET}, - {"CORES_INI_CFG ", HPRE_CORE_INI_CFG_OFFSET}, - {"CORES_INI_STATUS ", HPRE_CORE_INI_STATUS_OFFSET}, - {"CORES_HTBT_WARN ", HPRE_CORE_HTBT_WARN_OFFSET}, - {"CORES_IS_SCHD ", HPRE_CORE_IS_SCHD_OFFSET}, + {"CORES_EN_STATUS ", HPRE_CORE_EN_OFFSET}, + {"CORES_INI_CFG ", HPRE_CORE_INI_CFG_OFFSET}, + {"CORES_INI_STATUS ", HPRE_CORE_INI_STATUS_OFFSET}, + {"CORES_HTBT_WARN ", HPRE_CORE_HTBT_WARN_OFFSET}, + {"CORES_IS_SCHD ", HPRE_CORE_IS_SCHD_OFFSET}, }; static const struct debugfs_reg32 hpre_com_dfx_regs[] = { - {"READ_CLR_EN ", HPRE_CTRL_CNT_CLR_CE}, - {"AXQOS ", HPRE_VFG_AXQOS}, - {"AWUSR_CFG ", HPRE_AWUSR_FP_CFG}, - {"QM_ARUSR_MCFG1 ", QM_ARUSER_M_CFG_1}, - {"QM_AWUSR_MCFG1 ", QM_AWUSER_M_CFG_1}, - {"BD_ENDIAN ", HPRE_BD_ENDIAN}, - {"ECC_CHECK_CTRL ", HPRE_ECC_BYPASS}, - {"RAS_INT_WIDTH ", HPRE_RAS_WIDTH_CFG}, - {"POISON_BYPASS ", HPRE_POISON_BYPASS}, - {"BD_ARUSER ", HPRE_BD_ARUSR_CFG}, - {"BD_AWUSER ", HPRE_BD_AWUSR_CFG}, - {"DATA_ARUSER ", HPRE_DATA_RUSER_CFG}, - {"DATA_AWUSER ", HPRE_DATA_WUSER_CFG}, - {"INT_STATUS ", HPRE_INT_STATUS}, + {"READ_CLR_EN ", HPRE_CTRL_CNT_CLR_CE}, + {"AXQOS ", HPRE_VFG_AXQOS}, + {"AWUSR_CFG ", HPRE_AWUSR_FP_CFG}, + {"BD_ENDIAN ", HPRE_BD_ENDIAN}, + {"ECC_CHECK_CTRL ", HPRE_ECC_BYPASS}, + {"RAS_INT_WIDTH ", HPRE_RAS_WIDTH_CFG}, + {"POISON_BYPASS ", HPRE_POISON_BYPASS}, + {"BD_ARUSER ", HPRE_BD_ARUSR_CFG}, + {"BD_AWUSER ", HPRE_BD_AWUSR_CFG}, + {"DATA_ARUSER ", HPRE_DATA_RUSER_CFG}, + {"DATA_AWUSER ", HPRE_DATA_WUSER_CFG}, + {"INT_STATUS ", HPRE_INT_STATUS}, + {"INT_MASK ", HPRE_HAC_INT_MSK}, + {"RAS_CE_ENB ", HPRE_HAC_RAS_CE_ENB}, + {"RAS_NFE_ENB ", HPRE_HAC_RAS_NFE_ENB}, + {"RAS_FE_ENB ", HPRE_HAC_RAS_FE_ENB}, + {"INT_SET ", HPRE_HAC_INT_SET}, + {"RNG_TIMEOUT_NUM ", HPRE_RNG_TIMEOUT_NUM}, }; static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = { @@ -1023,6 +1033,82 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) return hisi_qm_init(qm); } +static int hpre_show_last_regs_init(struct hisi_qm *qm) +{ + int cluster_dfx_regs_num = ARRAY_SIZE(hpre_cluster_dfx_regs); + int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs); + u8 clusters_num = hpre_cluster_num(qm); + struct qm_debug *debug = &qm->debug; + void __iomem *io_base; + int i, j, idx; + + debug->last_words = kcalloc(cluster_dfx_regs_num * clusters_num + + com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL); + if (!debug->last_words) + return -ENOMEM; + + for (i = 0; i < com_dfx_regs_num; i++) + debug->last_words[i] = readl_relaxed(qm->io_base + + hpre_com_dfx_regs[i].offset); + + for (i = 0; i < clusters_num; i++) { + io_base = qm->io_base + hpre_cluster_offsets[i]; + for (j = 0; j < cluster_dfx_regs_num; j++) { + idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j; + debug->last_words[idx] = readl_relaxed( + io_base + hpre_cluster_dfx_regs[j].offset); + } + } + + return 0; +} + +static void hpre_show_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + kfree(debug->last_words); + debug->last_words = NULL; +} + +static void hpre_show_last_dfx_regs(struct hisi_qm *qm) +{ + int cluster_dfx_regs_num = ARRAY_SIZE(hpre_cluster_dfx_regs); + int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs); + u8 clusters_num = hpre_cluster_num(qm); + struct qm_debug *debug = &qm->debug; + struct pci_dev *pdev = qm->pdev; + void __iomem *io_base; + int i, j, idx; + u32 val; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + /* dumps last word of the debugging registers during controller reset */ + for (i = 0; i < com_dfx_regs_num; i++) { + val = readl_relaxed(qm->io_base + hpre_com_dfx_regs[i].offset); + if (debug->last_words[i] != val) + pci_info(pdev, "Common_core:%s \t= 0x%08x => 0x%08x\n", + hpre_com_dfx_regs[i].name, debug->last_words[i], val); + } + + for (i = 0; i < clusters_num; i++) { + io_base = qm->io_base + hpre_cluster_offsets[i]; + for (j = 0; j < cluster_dfx_regs_num; j++) { + val = readl_relaxed(io_base + + hpre_cluster_dfx_regs[j].offset); + idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j; + if (debug->last_words[idx] != val) + pci_info(pdev, "cluster-%d:%s \t= 0x%08x => 0x%08x\n", + i, hpre_cluster_dfx_regs[j].name, debug->last_words[idx], val); + } + } +} + static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts) { const struct hpre_hw_error *err = hpre_hw_errors; @@ -1081,6 +1167,7 @@ static const struct hisi_qm_err_ini hpre_err_ini = { .open_axi_master_ooo = hpre_open_axi_master_ooo, .open_sva_prefetch = hpre_open_sva_prefetch, .close_sva_prefetch = hpre_close_sva_prefetch, + .show_last_dfx_regs = hpre_show_last_dfx_regs, .err_info_init = hpre_err_info_init, }; @@ -1098,8 +1185,11 @@ static int hpre_pf_probe_init(struct hpre *hpre) qm->err_ini = &hpre_err_ini; qm->err_ini->err_info_init(qm); hisi_qm_dev_err_init(qm); + ret = hpre_show_last_regs_init(qm); + if (ret) + pci_err(qm->pdev, "Failed to init last word regs!\n"); - return 0; + return ret; } static int hpre_probe_init(struct hpre *hpre) @@ -1185,6 +1275,7 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) hisi_qm_stop(qm, QM_NORMAL); err_with_err_init: + hpre_show_last_regs_uninit(qm); hisi_qm_dev_err_uninit(qm); err_with_qm_init: @@ -1215,6 +1306,7 @@ static void hpre_remove(struct pci_dev *pdev) if (qm->fun_type == QM_HW_PF) { hpre_cnt_regs_clear(qm); qm->debug.curr_qm_qp_num = 0; + hpre_show_last_regs_uninit(qm); hisi_qm_dev_err_uninit(qm); } From 5bfabd50c6fa7fe817e492091fa9428a9202a045 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 9 Apr 2022 16:03:28 +0800 Subject: [PATCH 046/116] crypto: hisilicon/zip - support last word dumping 1. Add some debugging registers. 2. Add last word dumping function during zip engine controller reset. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/zip/zip_main.c | 107 +++++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 2d5663d8d87f..9c925e9c0a2d 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -234,6 +234,22 @@ static const struct debugfs_reg32 hzip_dfx_regs[] = { {"HZIP_DECOMP_LZ77_CURR_ST ", 0x9cull}, }; +static const struct debugfs_reg32 hzip_com_dfx_regs[] = { + {"HZIP_CLOCK_GATE_CTRL ", 0x301004}, + {"HZIP_CORE_INT_RAS_CE_ENB ", 0x301160}, + {"HZIP_CORE_INT_RAS_NFE_ENB ", 0x301164}, + {"HZIP_CORE_INT_RAS_FE_ENB ", 0x301168}, + {"HZIP_UNCOM_ERR_RAS_CTRL ", 0x30116C}, +}; + +static const struct debugfs_reg32 hzip_dump_dfx_regs[] = { + {"HZIP_GET_BD_NUM ", 0x00ull}, + {"HZIP_GET_RIGHT_BD ", 0x04ull}, + {"HZIP_GET_ERROR_BD ", 0x08ull}, + {"HZIP_DONE_BD_NUM ", 0x0cull}, + {"HZIP_MAX_DELAY ", 0x20ull}, +}; + /* define the ZIP's dfx regs region and region length */ static struct dfx_diff_registers hzip_diff_regs[] = { { @@ -773,6 +789,87 @@ static void hisi_zip_debugfs_exit(struct hisi_qm *qm) } } +static int hisi_zip_show_last_regs_init(struct hisi_qm *qm) +{ + int core_dfx_regs_num = ARRAY_SIZE(hzip_dump_dfx_regs); + int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs); + struct qm_debug *debug = &qm->debug; + void __iomem *io_base; + int i, j, idx; + + debug->last_words = kcalloc(core_dfx_regs_num * HZIP_CORE_NUM + + com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL); + if (!debug->last_words) + return -ENOMEM; + + for (i = 0; i < com_dfx_regs_num; i++) { + io_base = qm->io_base + hzip_com_dfx_regs[i].offset; + debug->last_words[i] = readl_relaxed(io_base); + } + + for (i = 0; i < HZIP_CORE_NUM; i++) { + io_base = qm->io_base + core_offsets[i]; + for (j = 0; j < core_dfx_regs_num; j++) { + idx = com_dfx_regs_num + i * core_dfx_regs_num + j; + debug->last_words[idx] = readl_relaxed( + io_base + hzip_dump_dfx_regs[j].offset); + } + } + + return 0; +} + +static void hisi_zip_show_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + kfree(debug->last_words); + debug->last_words = NULL; +} + +static void hisi_zip_show_last_dfx_regs(struct hisi_qm *qm) +{ + int core_dfx_regs_num = ARRAY_SIZE(hzip_dump_dfx_regs); + int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs); + struct qm_debug *debug = &qm->debug; + char buf[HZIP_BUF_SIZE]; + void __iomem *base; + int i, j, idx; + u32 val; + + if (qm->fun_type == QM_HW_VF || !debug->last_words) + return; + + for (i = 0; i < com_dfx_regs_num; i++) { + val = readl_relaxed(qm->io_base + hzip_com_dfx_regs[i].offset); + if (debug->last_words[i] != val) + pci_info(qm->pdev, "com_dfx: %s \t= 0x%08x => 0x%08x\n", + hzip_com_dfx_regs[i].name, debug->last_words[i], val); + } + + for (i = 0; i < HZIP_CORE_NUM; i++) { + if (i < HZIP_COMP_CORE_NUM) + scnprintf(buf, sizeof(buf), "Comp_core-%d", i); + else + scnprintf(buf, sizeof(buf), "Decomp_core-%d", + i - HZIP_COMP_CORE_NUM); + base = qm->io_base + core_offsets[i]; + + pci_info(qm->pdev, "==>%s:\n", buf); + /* dump last word for dfx regs during control resetting */ + for (j = 0; j < core_dfx_regs_num; j++) { + idx = com_dfx_regs_num + i * core_dfx_regs_num + j; + val = readl_relaxed(base + hzip_dump_dfx_regs[j].offset); + if (debug->last_words[idx] != val) + pci_info(qm->pdev, "%s \t= 0x%08x => 0x%08x\n", + hzip_dump_dfx_regs[j].name, debug->last_words[idx], val); + } + } +} + static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts) { const struct hisi_zip_hw_error *err = zip_hw_error; @@ -860,6 +957,7 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = { .close_axi_master_ooo = hisi_zip_close_axi_master_ooo, .open_sva_prefetch = hisi_zip_open_sva_prefetch, .close_sva_prefetch = hisi_zip_close_sva_prefetch, + .show_last_dfx_regs = hisi_zip_show_last_dfx_regs, .err_info_init = hisi_zip_err_info_init, }; @@ -867,6 +965,7 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) { struct hisi_qm *qm = &hisi_zip->qm; struct hisi_zip_ctrl *ctrl; + int ret; ctrl = devm_kzalloc(&qm->pdev->dev, sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -882,7 +981,11 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) hisi_qm_dev_err_init(qm); hisi_zip_debug_regs_clear(qm); - return 0; + ret = hisi_zip_show_last_regs_init(qm); + if (ret) + pci_err(qm->pdev, "Failed to init last word regs!\n"); + + return ret; } static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) @@ -1026,6 +1129,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) hisi_qm_stop(qm, QM_NORMAL); err_dev_err_uninit: + hisi_zip_show_last_regs_uninit(qm); hisi_qm_dev_err_uninit(qm); err_qm_uninit: @@ -1047,6 +1151,7 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); + hisi_zip_show_last_regs_uninit(qm); hisi_qm_dev_err_uninit(qm); hisi_zip_qm_uninit(qm); } From 948e35f13181a8ee85ca5b8cf50248746dfc6291 Mon Sep 17 00:00:00 2001 From: Yang Shen Date: Sat, 9 Apr 2022 17:33:09 +0800 Subject: [PATCH 047/116] crypto: hisilicon/sgl - align the hardware sgl dma address The hardware needs aligned sgl dma address. So expend the sgl_size to align 64 bytes. Signed-off-by: Yang Shen Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sgl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index f7efc02b065f..2b6f2281cfd6 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 HiSilicon Limited. */ +#include #include #include #include @@ -64,8 +65,9 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX) return ERR_PTR(-EINVAL); - sgl_size = sizeof(struct acc_hw_sge) * sge_nr + - sizeof(struct hisi_acc_hw_sgl); + sgl_size = ALIGN(sizeof(struct acc_hw_sge) * sge_nr + + sizeof(struct hisi_acc_hw_sgl), + HISI_ACC_SGL_ALIGN_SIZE); /* * the pool may allocate a block of memory of size PAGE_SIZE * 2^(MAX_ORDER - 1), From b45b0a12200893732a0b0ec4a6df18521fd976ad Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 11 Apr 2022 11:13:13 +0800 Subject: [PATCH 048/116] crypto: arm64/sm4 - Fix wrong dependency of NEON/CE implementation Commit d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") moved the sm4 library implementation from the lib/crypto directory to the crypto directory and configured the name as CRYPTO_SM4. The arm64 SM4 NEON/CE implementation depends on this and needs to be modified uniformly. Fixes: 4f1aef9b806f ("crypto: arm64/sm4 - add ARMv8 NEON implementation") Fixes: 5b33e0ec881c ("crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation") Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 4fe7037d2347..ac85682c013c 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -57,13 +57,13 @@ config CRYPTO_SM4_ARM64_CE_BLK tristate "SM4 in ECB/CBC/CFB/CTR modes using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER - select CRYPTO_LIB_SM4 + select CRYPTO_SM4 config CRYPTO_SM4_ARM64_NEON_BLK tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER - select CRYPTO_LIB_SM4 + select CRYPTO_SM4 config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" From bcfcc0a61debc152788a83dc7afaeda2445677e4 Mon Sep 17 00:00:00 2001 From: Jayesh Choudhary Date: Tue, 12 Apr 2022 13:00:15 +0530 Subject: [PATCH 049/116] dt-bindings: crypto: ti,sa2ul: Add a new compatible for AM62 Add the AM62 version of sa3ul to the compatible list. Signed-off-by: Jayesh Choudhary Acked-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml index a410d2cedde6..02f47c2e7998 100644 --- a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml +++ b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml @@ -15,6 +15,7 @@ properties: - ti,j721e-sa2ul - ti,am654-sa2ul - ti,am64-sa2ul + - ti,am62-sa3ul reg: maxItems: 1 From 5a6477eaf402a2fa48c66f1dae1fbd9f0a5f096a Mon Sep 17 00:00:00 2001 From: Jayesh Choudhary Date: Tue, 12 Apr 2022 13:00:16 +0530 Subject: [PATCH 050/116] crypto: sa2ul - Add the new compatible for AM62 Add the new compatible for am62x in of_match_table. Signed-off-by: Jayesh Choudhary Signed-off-by: Herbert Xu --- drivers/crypto/sa2ul.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c index 51b58e57153f..6957a125b447 100644 --- a/drivers/crypto/sa2ul.c +++ b/drivers/crypto/sa2ul.c @@ -2379,6 +2379,7 @@ static const struct of_device_id of_match[] = { { .compatible = "ti,j721e-sa2ul", .data = &am654_match_data, }, { .compatible = "ti,am654-sa2ul", .data = &am654_match_data, }, { .compatible = "ti,am64-sa2ul", .data = &am64_match_data, }, + { .compatible = "ti,am62-sa3ul", .data = &am64_match_data, }, {}, }; MODULE_DEVICE_TABLE(of, of_match); From 753d6770879894de10d74b437ab99ea380f1cad7 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Wed, 13 Apr 2022 16:16:05 +0200 Subject: [PATCH 051/116] hwrng: cn10k - Optimize cn10k_rng_read() This function assumes that sizeof(void) is 1 and arithmetic works for void pointers. This is a GNU C extention and may not work with other compilers. Change this by using an u8 pointer. Also move cn10k_read_trng() out of a loop thus saving some cycles. Fixes: 38e9791a0209 ("hwrng: cn10k - Add random number generator support") Signed-off-by: Vladis Dronov Signed-off-by: Herbert Xu --- drivers/char/hw_random/cn10k-rng.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c index 35001c63648b..dd226630b67d 100644 --- a/drivers/char/hw_random/cn10k-rng.c +++ b/drivers/char/hw_random/cn10k-rng.c @@ -90,6 +90,7 @@ static int cn10k_rng_read(struct hwrng *hwrng, void *data, { struct cn10k_rng *rng = (struct cn10k_rng *)hwrng->priv; unsigned int size; + u8 *pos = data; int err = 0; u64 value; @@ -102,17 +103,20 @@ static int cn10k_rng_read(struct hwrng *hwrng, void *data, while (size >= 8) { cn10k_read_trng(rng, &value); - *((u64 *)data) = (u64)value; + *((u64 *)pos) = value; size -= 8; - data += 8; + pos += 8; } - while (size > 0) { + if (size > 0) { cn10k_read_trng(rng, &value); - *((u8 *)data) = (u8)value; - size--; - data++; + while (size > 0) { + *pos = (u8)value; + value >>= 8; + size--; + pos++; + } } return max - size; From 32547a6aedda132907fcd15cdc8271429609f216 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Wed, 13 Apr 2022 16:16:06 +0200 Subject: [PATCH 052/116] hwrng: cn10k - Make check_rng_health() return an error code Currently check_rng_health() returns zero unconditionally. Make it to output an error code and return it. Fixes: 38e9791a0209 ("hwrng: cn10k - Add random number generator support") Signed-off-by: Vladis Dronov Signed-off-by: Herbert Xu --- drivers/char/hw_random/cn10k-rng.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c index dd226630b67d..a01e9307737c 100644 --- a/drivers/char/hw_random/cn10k-rng.c +++ b/drivers/char/hw_random/cn10k-rng.c @@ -31,26 +31,23 @@ struct cn10k_rng { #define PLAT_OCTEONTX_RESET_RNG_EBG_HEALTH_STATE 0xc2000b0f -static int reset_rng_health_state(struct cn10k_rng *rng) +static unsigned long reset_rng_health_state(struct cn10k_rng *rng) { struct arm_smccc_res res; /* Send SMC service call to reset EBG health state */ arm_smccc_smc(PLAT_OCTEONTX_RESET_RNG_EBG_HEALTH_STATE, 0, 0, 0, 0, 0, 0, 0, &res); - if (res.a0 != 0UL) - return -EIO; - - return 0; + return res.a0; } static int check_rng_health(struct cn10k_rng *rng) { u64 status; - int err; + unsigned long err; /* Skip checking health */ if (!rng->reg_base) - return 0; + return -ENODEV; status = readq(rng->reg_base + RNM_PF_EBG_HEALTH); if (status & BIT_ULL(20)) { @@ -58,7 +55,9 @@ static int check_rng_health(struct cn10k_rng *rng) if (err) { dev_err(&rng->pdev->dev, "HWRNG: Health test failed (status=%llx)\n", status); - dev_err(&rng->pdev->dev, "HWRNG: error during reset\n"); + dev_err(&rng->pdev->dev, "HWRNG: error during reset (error=%lx)\n", + err); + return -EIO; } } return 0; From a77aba3109363ae89711fa2dc3523520c760937f Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Wed, 13 Apr 2022 08:58:35 -0700 Subject: [PATCH 053/116] crypto: ccp - Log when resetting PSP SEV state Currently when the PSP returns a SECURE_DATA_INVALID error on INIT or INIT_EX the driver retries the command once which should reset the PSP's state SEV related state, meaning the PSP will regenerate its keying material. This is logged with a dbg log but given this will change system state this should be logged at a higher priority and with more information. Signed-off-by: Peter Gonda Cc: Tom Lendacky Cc: Brijesh Singh Cc: David Rientjes Cc: Herbert Xu Cc: John Allen Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 6ab93dfd478a..fd928199bf1e 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -435,7 +435,7 @@ static int __sev_platform_init_locked(int *error) * initialization function should succeed by replacing the state * with a reset state. */ - dev_dbg(sev->dev, "SEV: retrying INIT command"); + dev_err(sev->dev, "SEV: retrying INIT command because of SECURE_DATA_INVALID error. Retrying once to reset PSP SEV state."); rc = init_function(&psp_ret); } if (error) From 4ffa1763622ae5752961499588f3f8874315f974 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 13 Apr 2022 19:11:54 +0000 Subject: [PATCH 054/116] crypto: marvell/cesa - ECB does not IV The DES3 ECB has an IV size set but ECB does not need one. Fixes: 4ada483978237 ("crypto: marvell/cesa - add Triple-DES support") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa/cipher.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index b739d3b873dc..c6f2fa753b7c 100644 --- a/drivers/crypto/marvell/cesa/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -624,7 +624,6 @@ struct skcipher_alg mv_cesa_ecb_des3_ede_alg = { .decrypt = mv_cesa_ecb_des3_ede_decrypt, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, .base = { .cra_name = "ecb(des3_ede)", .cra_driver_name = "mv-ecb-des3-ede", From 6a71277ce91e4766ebe9a5f6725089c80d043ba2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 15 Apr 2022 16:37:47 +0800 Subject: [PATCH 055/116] hwrng: mpfs - Enable COMPILE_TEST The dependency on HW_RANDOM is redundant so this patch removes it. As this driver seems to cross-compile just fine we could also enable COMPILE_TEST. Signed-off-by: Herbert Xu Reviewed-by: Conor Dooley Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index c3a9f17bf31c..1245472a0dfe 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -387,7 +387,7 @@ config HW_RANDOM_PIC32 config HW_RANDOM_POLARFIRE_SOC tristate "Microchip PolarFire SoC Random Number Generator support" - depends on HW_RANDOM && POLARFIRE_SOC_SYS_CTRL + depends on POLARFIRE_SOC_SYS_CTRL || COMPILE_TEST help This driver provides kernel-side support for the Random Number Generator hardware found on PolarFire SoC (MPFS). From 0b0002315adfea3d9eb102665a4441727d4d2621 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 16 Apr 2022 18:45:56 +0800 Subject: [PATCH 056/116] crypto: hisilicon/qm - remove unused function declaration The 'hisi_qm_get_hw_version' function is unused, so remove the function declaration. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index e5522eaf88fd..5177858aeea9 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -453,7 +453,6 @@ int hisi_qp_send(struct hisi_qp *qp, const void *msg); int hisi_qm_get_free_qp_num(struct hisi_qm *qm); int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); void hisi_qm_debug_init(struct hisi_qm *qm); -enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs); int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); From fb06eb9727d67eac16e6b6aff35362476389cb88 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 16 Apr 2022 18:45:57 +0800 Subject: [PATCH 057/116] crypto: hisilicon/qm - set function with static These functions 'hisi_qm_create_qp' and 'hisi_qm_set_vft' are not used outside qm.c, so they are marked as static. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 6 ++---- include/linux/hisi_acc_qm.h | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 7f5c3186a895..2a6df6374c91 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -2835,7 +2835,7 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type) * return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating * qp memory fails. */ -struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) +static struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) { struct hisi_qp *qp; int ret; @@ -2853,7 +2853,6 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) return qp; } -EXPORT_SYMBOL_GPL(hisi_qm_create_qp); /** * hisi_qm_release_qp() - Release a qp back to its qm. @@ -3738,7 +3737,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_uninit); * * qm hw v1 does not support this interface. */ -int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number) +static int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number) { if (!base || !number) return -EINVAL; @@ -3750,7 +3749,6 @@ int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number) return qm->ops->get_vft(qm, base, number); } -EXPORT_SYMBOL_GPL(hisi_qm_get_vft); /** * hisi_qm_set_vft() - Set vft to a qm. diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 5177858aeea9..e99b286b50bc 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -445,13 +445,11 @@ int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); -struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type); int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); int hisi_qm_stop_qp(struct hisi_qp *qp); void hisi_qm_release_qp(struct hisi_qp *qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg); int hisi_qm_get_free_qp_num(struct hisi_qm *qm); -int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); void hisi_qm_debug_init(struct hisi_qm *qm); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs); From 7982996c5b0812cbf7846deff0e2f5dcbf290129 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 16 Apr 2022 18:45:58 +0800 Subject: [PATCH 058/116] crypto: hisilicon/qm - replace hisi_qm_release_qp() with hisi_qm_free_qps() hisi_qm_free_qps() can release multiple queues in one call, and it is already exported. So, replace hisi_qm_release_qp() with hisi_qm_free_qps() in zip_crypto.c, and do not export hisi_qm_release_qp() outside qm.c. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 3 +-- drivers/crypto/hisilicon/zip/zip_crypto.c | 2 +- include/linux/hisi_acc_qm.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 2a6df6374c91..286550086c77 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -2860,7 +2860,7 @@ static struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) * * This function releases the resource of a qp. */ -void hisi_qm_release_qp(struct hisi_qp *qp) +static void hisi_qm_release_qp(struct hisi_qp *qp) { struct hisi_qm *qm = qp->qm; @@ -2878,7 +2878,6 @@ void hisi_qm_release_qp(struct hisi_qp *qp) qm_pm_put_sync(qm); } -EXPORT_SYMBOL_GPL(hisi_qm_release_qp); static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) { diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 9520a4113c81..67869513e48c 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -521,7 +521,7 @@ static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx, static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx) { hisi_qm_stop_qp(ctx->qp); - hisi_qm_release_qp(ctx->qp); + hisi_qm_free_qps(&ctx->qp, 1); } static const struct hisi_zip_sqe_ops hisi_zip_ops_v1 = { diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index e99b286b50bc..d2ccb11071c5 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -447,7 +447,6 @@ int hisi_qm_start(struct hisi_qm *qm); int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); int hisi_qm_stop_qp(struct hisi_qp *qp); -void hisi_qm_release_qp(struct hisi_qp *qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg); int hisi_qm_get_free_qp_num(struct hisi_qm *qm); void hisi_qm_debug_init(struct hisi_qm *qm); From b0c42232fce499ba96fbf2c5ebd2368efeb6597e Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 16 Apr 2022 18:45:59 +0800 Subject: [PATCH 059/116] crypto: hisilicon/qm - remove hisi_qm_get_free_qp_num() hisi_qm_get_free_qp_num() is to get the free queue number on the function. It is a simple function and is only called by hisi_qm_get_available_instances(). This patch modifies to get the free queue directly in hisi_qm_get_available_instances(), and remove hisi_qm_get_free_qp_num(). Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 28 +++++++++------------------- include/linux/hisi_acc_qm.h | 1 - 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 286550086c77..b4ca2eb034d7 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3226,9 +3226,17 @@ static void qm_qp_event_notifier(struct hisi_qp *qp) wake_up_interruptible(&qp->uacce_q->wait); } + /* This function returns free number of qp in qm. */ static int hisi_qm_get_available_instances(struct uacce_device *uacce) { - return hisi_qm_get_free_qp_num(uacce->priv); + struct hisi_qm *qm = uacce->priv; + int ret; + + down_read(&qm->qps_lock); + ret = qm->qp_num - qm->qp_in_used; + up_read(&qm->qps_lock); + + return ret; } static void hisi_qm_set_hw_reset(struct hisi_qm *qm, int offset) @@ -3540,24 +3548,6 @@ void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list) } EXPORT_SYMBOL_GPL(hisi_qm_wait_task_finish); -/** - * hisi_qm_get_free_qp_num() - Get free number of qp in qm. - * @qm: The qm which want to get free qp. - * - * This function return free number of qp in qm. - */ -int hisi_qm_get_free_qp_num(struct hisi_qm *qm) -{ - int ret; - - down_read(&qm->qps_lock); - ret = qm->qp_num - qm->qp_in_used; - up_read(&qm->qps_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(hisi_qm_get_free_qp_num); - static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num) { struct device *dev = &qm->pdev->dev; diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index d2ccb11071c5..6cabafffd0dd 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -448,7 +448,6 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); int hisi_qm_stop_qp(struct hisi_qp *qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg); -int hisi_qm_get_free_qp_num(struct hisi_qm *qm); void hisi_qm_debug_init(struct hisi_qm *qm); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs); From c6d3ffae0d3229e06097f2790f459c96fca5e367 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 Apr 2022 17:42:36 +0800 Subject: [PATCH 060/116] Revert "hwrng: mpfs - Enable COMPILE_TEST" This reverts commit 6a71277ce91e4766ebe9a5f6725089c80d043ba2. The underlying option POLARFIRE_SOC_SYS_CTRL already supports COMPILE_TEST so there is no need for this. What's more, if we force this option on without the underlying option it fails to build. Reported-by: kernel test robot Reported-by: Randy Dunlap Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 1245472a0dfe..c3a9f17bf31c 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -387,7 +387,7 @@ config HW_RANDOM_PIC32 config HW_RANDOM_POLARFIRE_SOC tristate "Microchip PolarFire SoC Random Number Generator support" - depends on POLARFIRE_SOC_SYS_CTRL || COMPILE_TEST + depends on HW_RANDOM && POLARFIRE_SOC_SYS_CTRL help This driver provides kernel-side support for the Random Number Generator hardware found on PolarFire SoC (MPFS). From 05def5cacfa0bd5ba380116046747da07ff5bd78 Mon Sep 17 00:00:00 2001 From: Jacky Li Date: Thu, 14 Apr 2022 16:23:25 +0000 Subject: [PATCH 061/116] crypto: ccp - Fix the INIT_EX data file open failure There are 2 common cases when INIT_EX data file might not be opened successfully and fail the sev initialization: 1. In user namespaces, normal user tasks (e.g. VMM) can change their current->fs->root to point to arbitrary directories. While init_ex_path is provided as a module param related to root file system. Solution: use the root directory of init_task to avoid accessing the wrong file. 2. Normal user tasks (e.g. VMM) don't have the privilege to access the INIT_EX data file. Solution: open the file as root and restore permissions immediately. Fixes: 3d725965f836 ("crypto: ccp - Add SEV_INIT_EX support") Signed-off-by: Jacky Li Reviewed-by: Peter Gonda Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index fd928199bf1e..799b476fc3e8 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -170,6 +171,31 @@ static void *sev_fw_alloc(unsigned long len) return page_address(page); } +static struct file *open_file_as_root(const char *filename, int flags, umode_t mode) +{ + struct file *fp; + struct path root; + struct cred *cred; + const struct cred *old_cred; + + task_lock(&init_task); + get_fs_root(init_task.fs, &root); + task_unlock(&init_task); + + cred = prepare_creds(); + if (!cred) + return ERR_PTR(-ENOMEM); + cred->fsuid = GLOBAL_ROOT_UID; + old_cred = override_creds(cred); + + fp = file_open_root(&root, filename, flags, mode); + path_put(&root); + + revert_creds(old_cred); + + return fp; +} + static int sev_read_init_ex_file(void) { struct sev_device *sev = psp_master->sev_data; @@ -181,7 +207,7 @@ static int sev_read_init_ex_file(void) if (!sev_init_ex_buffer) return -EOPNOTSUPP; - fp = filp_open(init_ex_path, O_RDONLY, 0); + fp = open_file_as_root(init_ex_path, O_RDONLY, 0); if (IS_ERR(fp)) { int ret = PTR_ERR(fp); @@ -217,7 +243,7 @@ static void sev_write_init_ex_file(void) if (!sev_init_ex_buffer) return; - fp = filp_open(init_ex_path, O_CREAT | O_WRONLY, 0600); + fp = open_file_as_root(init_ex_path, O_CREAT | O_WRONLY, 0600); if (IS_ERR(fp)) { dev_err(sev->dev, "SEV: could not open file for write, error %ld\n", From cca806307311bec150268646572ef6a3548ac295 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Mon, 18 Apr 2022 01:57:00 +0000 Subject: [PATCH 062/116] crypto: keembay - Make use of devm helper function devm_platform_ioremap_resource() Use the devm_platform_ioremap_resource() helper instead of calling platform_get_resource() and devm_ioremap_resource() separately.Make the code simpler without functional changes. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Signed-off-by: Herbert Xu --- drivers/crypto/keembay/keembay-ocs-aes-core.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/crypto/keembay/keembay-ocs-aes-core.c b/drivers/crypto/keembay/keembay-ocs-aes-core.c index e2a39fdaf623..9953f5590ac4 100644 --- a/drivers/crypto/keembay/keembay-ocs-aes-core.c +++ b/drivers/crypto/keembay/keembay-ocs-aes-core.c @@ -1598,7 +1598,6 @@ static int kmb_ocs_aes_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ocs_aes_dev *aes_dev; - struct resource *aes_mem; int rc; aes_dev = devm_kzalloc(dev, sizeof(*aes_dev), GFP_KERNEL); @@ -1616,13 +1615,7 @@ static int kmb_ocs_aes_probe(struct platform_device *pdev) } /* Get base register address. */ - aes_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!aes_mem) { - dev_err(dev, "Could not retrieve io mem resource\n"); - return -ENODEV; - } - - aes_dev->base_reg = devm_ioremap_resource(&pdev->dev, aes_mem); + aes_dev->base_reg = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(aes_dev->base_reg)) return PTR_ERR(aes_dev->base_reg); From ee74fdf0ca74e6a65716400b403285686133a9f8 Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Mon, 18 Apr 2022 11:05:37 +0000 Subject: [PATCH 063/116] crypto: sun8i-ss - using pm_runtime_resume_and_get instead of pm_runtime_get_sync Using pm_runtime_resume_and_get is more appropriate for simplifing code Reported-by: Zeal Robot Signed-off-by: Minghao Chi Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c index 246a6782674c..dd677e9ed06f 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c @@ -112,11 +112,9 @@ int sun8i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, goto err_iv; } - err = pm_runtime_get_sync(ss->dev); - if (err < 0) { - pm_runtime_put_noidle(ss->dev); + err = pm_runtime_resume_and_get(ss->dev); + if (err < 0) goto err_pm; - } err = 0; mutex_lock(&ss->mlock); From d3bae86698720f6fc2ca07d3850ec3167a11ec2b Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Wed, 20 Apr 2022 03:02:18 +0000 Subject: [PATCH 064/116] crypto: sun8i-ce - using pm_runtime_resume_and_get instead of pm_runtime_get_sync Using pm_runtime_resume_and_get() to replace pm_runtime_get_sync and pm_runtime_put_noidle. This change is just to simplify the code, no actual functional changes. Reported-by: Zeal Robot Signed-off-by: Minghao Chi Tested-by: Corentin Labbe Acked-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c index b3a9bbfb8831..b3cc43ea6c8a 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c @@ -108,11 +108,9 @@ int sun8i_ce_prng_generate(struct crypto_rng *tfm, const u8 *src, goto err_dst; } - err = pm_runtime_get_sync(ce->dev); - if (err < 0) { - pm_runtime_put_noidle(ce->dev); + err = pm_runtime_resume_and_get(ce->dev); + if (err < 0) goto err_pm; - } mutex_lock(&ce->rnglock); chan = &ce->chanlist[flow]; From 4ee4cdad368a26de3967f2975806a9ee2fa245df Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 20 Apr 2022 09:06:01 -0300 Subject: [PATCH 065/116] crypto: caam - fix i.MX6SX entropy delay value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 358ba762d9f1 ("crypto: caam - enable prediction resistance in HRWNG") the following CAAM errors can be seen on i.MX6SX: caam_jr 2101000.jr: 20003c5b: CCB: desc idx 60: RNG: Hardware error hwrng: no data available This error is due to an incorrect entropy delay for i.MX6SX. Fix it by increasing the minimum entropy delay for i.MX6SX as done in U-Boot: https://patchwork.ozlabs.org/project/uboot/patch/20220415111049.2565744-1-gaurav.jain@nxp.com/ As explained in the U-Boot patch: "RNG self tests are run to determine the correct entropy delay. Such tests are executed with different voltages and temperatures to identify the worst case value for the entropy delay. For i.MX6SX, it was determined that after adding a margin value of 1000 the minimum entropy delay should be at least 12000." Cc: Fixes: 358ba762d9f1 ("crypto: caam - enable prediction resistance in HRWNG") Signed-off-by: Fabio Estevam Reviewed-by: Horia Geantă Reviewed-by: Vabhav Sharma Reviewed-by: Gaurav Jain Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index ca0361b2dbb0..f87aa2169e5f 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -609,6 +609,13 @@ static bool check_version(struct fsl_mc_version *mc_version, u32 major, } #endif +static bool needs_entropy_delay_adjustment(void) +{ + if (of_machine_is_compatible("fsl,imx6sx")) + return true; + return false; +} + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -855,6 +862,8 @@ static int caam_probe(struct platform_device *pdev) * Also, if a handle was instantiated, do not change * the TRNG parameters. */ + if (needs_entropy_delay_adjustment()) + ent_delay = 12000; if (!(ctrlpriv->rng4_sh_init || inst_handles)) { dev_info(dev, "Entropy delay = %u\n", @@ -871,6 +880,15 @@ static int caam_probe(struct platform_device *pdev) */ ret = instantiate_rng(dev, inst_handles, gen_sk); + /* + * Entropy delay is determined via TRNG characterization. + * TRNG characterization is run across different voltages + * and temperatures. + * If worst case value for ent_dly is identified, + * the loop can be skipped for that platform. + */ + if (needs_entropy_delay_adjustment()) + break; if (ret == -EAGAIN) /* * if here, the loop will rerun, From 7cc7ab73f83ee6d50dc9536bc3355495d8600fad Mon Sep 17 00:00:00 2001 From: Vitaly Chikunov Date: Thu, 21 Apr 2022 20:25:10 +0300 Subject: [PATCH 066/116] crypto: ecrdsa - Fix incorrect use of vli_cmp Correctly compare values that shall be greater-or-equal and not just greater. Fixes: 0d7a78643f69 ("crypto: ecrdsa - add EC-RDSA (GOST 34.10) algorithm") Cc: Signed-off-by: Vitaly Chikunov Signed-off-by: Herbert Xu --- crypto/ecrdsa.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index b32ffcaad9ad..f3c6b5e15e75 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -113,15 +113,15 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 1: verify that 0 < r < q, 0 < s < q */ if (vli_is_zero(r, ndigits) || - vli_cmp(r, ctx->curve->n, ndigits) == 1 || + vli_cmp(r, ctx->curve->n, ndigits) >= 0 || vli_is_zero(s, ndigits) || - vli_cmp(s, ctx->curve->n, ndigits) == 1) + vli_cmp(s, ctx->curve->n, ndigits) >= 0) return -EKEYREJECTED; /* Step 2: calculate hash (h) of the message (passed as input) */ /* Step 3: calculate e = h \mod q */ vli_from_le64(e, digest, ndigits); - if (vli_cmp(e, ctx->curve->n, ndigits) == 1) + if (vli_cmp(e, ctx->curve->n, ndigits) >= 0) vli_sub(e, e, ctx->curve->n, ndigits); if (vli_is_zero(e, ndigits)) e[0] = 1; @@ -137,7 +137,7 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */ ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key, ctx->curve); - if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1) + if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0) vli_sub(cc.x, cc.x, ctx->curve->n, ndigits); /* Step 7: if R == r signature is valid */ From fd463e980f00a0fc7d7e462bd336efb819c04f9e Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Sun, 24 Apr 2022 16:50:31 +0800 Subject: [PATCH 067/116] crypto: qat - Fix unsigned function returning negative constant The function qat_uclo_check_image_compat has an unsigned return type, but returns a negative constant to indicate an error condition. So we change unsigned to int. Signed-off-by: Haowen Bai Acked-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_uclo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c index 6356402a2c9e..4b6f37d6e85b 100644 --- a/drivers/crypto/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/qat/qat_common/qat_uclo.c @@ -519,7 +519,7 @@ qat_uclo_map_chunk(char *buf, struct icp_qat_uof_filehdr *file_hdr, return NULL; } -static unsigned int +static int qat_uclo_check_image_compat(struct icp_qat_uof_encap_obj *encap_uof_obj, struct icp_qat_uof_image *image) { From 11aeb93089ce0bf12ef79fe4d05fde075275e125 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 24 Apr 2022 19:11:56 +0100 Subject: [PATCH 068/116] hwrng: optee - remove redundant initialization to variable rng_size Variable rng_size is being initialized with a value that is never read, the variable is being re-assigned later on. The initialization is redundant and can be removed. Cleans up cppcheck warning: Variable 'rng_size' is assigned a value that is never used. Signed-off-by: Colin Ian King Reviewed-by: Sumit Garg Signed-off-by: Herbert Xu --- drivers/char/hw_random/optee-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/optee-rng.c b/drivers/char/hw_random/optee-rng.c index a948c0727b2b..96b5d546d136 100644 --- a/drivers/char/hw_random/optee-rng.c +++ b/drivers/char/hw_random/optee-rng.c @@ -115,7 +115,7 @@ static size_t get_optee_rng_data(struct optee_rng_private *pvt_data, static int optee_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) { struct optee_rng_private *pvt_data = to_optee_rng_private(rng); - size_t read = 0, rng_size = 0; + size_t read = 0, rng_size; int timeout = 1; u8 *data = buf; From fa048cd1ef5ef86bdd42af0180dfd9d3f7af81e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 21 Apr 2022 15:44:57 +0200 Subject: [PATCH 069/116] crypto: atmel-sha204a - Add support for ATSHA204 cryptochip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ATSHA204 is predecessor of ATSHA204A which supports less features and some of them are slightly different. Introduce a new compatible string for ATSHA204 cryptochip "atmel,atsha204". Current version of Linux kernel driver atmel-sha204a.c implements only hw random number generator which is same in both ATSHA204 and ATSHA204A cryptochips. So driver already supports also ATSHA204 hw generator, so just simply extends list of compatible strings. Signed-off-by: Pali Rohár Acked-by: Rob Herring Signed-off-by: Herbert Xu --- Documentation/devicetree/bindings/trivial-devices.yaml | 4 +++- drivers/crypto/atmel-sha204a.c | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index 550a2e5c9e05..98b45773432b 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -47,7 +47,9 @@ properties: - at,24c08 # i2c trusted platform module (TPM) - atmel,at97sc3204t - # i2c h/w symmetric crypto module + # ATSHA204 - i2c h/w symmetric crypto module + - atmel,atsha204 + # ATSHA204A - i2c h/w symmetric crypto module - atmel,atsha204a # i2c h/w elliptic curve crypto module - atmel,atecc508a diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index c96c14e7dab1..3a03f4aaf100 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -132,12 +132,14 @@ static int atmel_sha204a_remove(struct i2c_client *client) } static const struct of_device_id atmel_sha204a_dt_ids[] = { + { .compatible = "atmel,atsha204", }, { .compatible = "atmel,atsha204a", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, atmel_sha204a_dt_ids); static const struct i2c_device_id atmel_sha204a_id[] = { + { "atsha204", 0 }, { "atsha204a", 0 }, { /* sentinel */ } }; From fdbf5e46e7af5b13ee9df35f2ea62484591c4341 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 26 Apr 2022 19:53:58 +0800 Subject: [PATCH 070/116] crypto: hisilicon/sec - add sm4 generic selection Add sm4 generic selection for fallback tfm in the Kconfig. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index e572f9982d4e..27e1fa912063 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -26,6 +26,7 @@ config CRYPTO_DEV_HISI_SEC2 select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 + select CRYPTO_SM4 depends on PCI && PCI_MSI depends on UACCE || UACCE=n depends on ARM64 || (COMPILE_TEST && 64BIT) From 580c8619698f2c4af4c81abc2a26641d4162d9b3 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Wed, 27 Apr 2022 09:43:51 +0200 Subject: [PATCH 071/116] crypto: inside-secure - Add MODULE_FIRMWARE macros The safexcel module loads firmware so add MODULE_FIRMWARE macros to provide that information via modinfo. Signed-off-by: Juerg Haefliger Acked-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 9ff885d50edf..9b1a158aec29 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -1997,3 +1997,12 @@ MODULE_AUTHOR("Igal Liberman "); MODULE_DESCRIPTION("Support for SafeXcel cryptographic engines: EIP97 & EIP197"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CRYPTO_INTERNAL); + +MODULE_FIRMWARE("ifpp.bin"); +MODULE_FIRMWARE("ipue.bin"); +MODULE_FIRMWARE("inside-secure/eip197b/ifpp.bin"); +MODULE_FIRMWARE("inside-secure/eip197b/ipue.bin"); +MODULE_FIRMWARE("inside-secure/eip197d/ifpp.bin"); +MODULE_FIRMWARE("inside-secure/eip197d/ipue.bin"); +MODULE_FIRMWARE("inside-secure/eip197_minifw/ifpp.bin"); +MODULE_FIRMWARE("inside-secure/eip197_minifw/ipue.bin"); From 384e9aa77ae6b0575631ea3100563a9fe44b0e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 28 Apr 2022 19:11:45 +0200 Subject: [PATCH 072/116] crypto: atmel-sha204a - Remove useless check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kfree(NULL) is a noop, so there is no win in checking a pointer before kfreeing it. Signed-off-by: Uwe Kleine-König Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha204a.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 3a03f4aaf100..9f70f4345b12 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -125,8 +125,7 @@ static int atmel_sha204a_remove(struct i2c_client *client) return -EBUSY; } - if (i2c_priv->hwrng.priv) - kfree((void *)i2c_priv->hwrng.priv); + kfree((void *)i2c_priv->hwrng.priv); return 0; } From 57182182317aaadb6548f39eeadfe557a24f6a30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 28 Apr 2022 19:11:46 +0200 Subject: [PATCH 073/116] crypto: atmel-sha204a - Suppress duplicate error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning an error value in an i2c remove callback results in an error message being emitted by the i2c core, but otherwise it doesn't make a difference. The device goes away anyhow and the devm cleanups are called. As atmel_sha204a_remove already emits an error message ant the additional error message by the i2c core doesn't add any useful information, change the return value to zero to suppress this error message. Note that after atmel_sha204a_remove() returns *i2c_priv is freed, so there is trouble ahead because atmel_sha204a_rng_done() might be called after that freeing. So make the error message a bit more frightening. Signed-off-by: Uwe Kleine-König Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha204a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 9f70f4345b12..50363999b990 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -121,8 +121,8 @@ static int atmel_sha204a_remove(struct i2c_client *client) struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client); if (atomic_read(&i2c_priv->tfm_count)) { - dev_err(&client->dev, "Device is busy\n"); - return -EBUSY; + dev_emerg(&client->dev, "Device is busy, will remove it anyhow\n"); + return 0; } kfree((void *)i2c_priv->hwrng.priv); From 25dfae684031f292034a0f42155090df6309f152 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 29 Apr 2022 13:37:20 +0800 Subject: [PATCH 074/116] hwrng: cn10k - Enable compile testing This patch enables COMPILE_TEST for cn10k. Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index c3a9f17bf31c..b3f2d55dc551 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -540,7 +540,7 @@ config HW_RANDOM_ARM_SMCCC_TRNG config HW_RANDOM_CN10K tristate "Marvell CN10K Random Number Generator support" - depends on HW_RANDOM && PCI && ARM64 + depends on HW_RANDOM && PCI && (ARM64 || (64BIT && COMPILE_TEST)) default HW_RANDOM help This driver provides support for the True Random Number From 0aa6ac7795cab3d8bcf3209d77459d595f4843da Mon Sep 17 00:00:00 2001 From: Meenakshi Aggarwal Date: Fri, 29 Apr 2022 13:48:08 +0200 Subject: [PATCH 075/116] crypto: caam/rng - Add support for PRNG Add support for random number generation using PRNG mode of CAAM and expose the interface through crypto API. According to the RM, the HW implementation of the DRBG follows NIST SP 800-90A specification for DRBG_Hash SHA-256 function Signed-off-by: Meenakshi Aggarwal Reviewed-by: Horia Geant Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 8 ++ drivers/crypto/caam/Makefile | 1 + drivers/crypto/caam/caamprng.c | 235 +++++++++++++++++++++++++++++++++ drivers/crypto/caam/intern.h | 15 +++ drivers/crypto/caam/jr.c | 3 +- 5 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/caam/caamprng.c diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 84ea7cba5ee5..0aa52b612a01 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -151,6 +151,14 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API Selecting this will register the SEC4 hardware rng to the hw_random API for supplying the kernel entropy pool. +config CRYPTO_DEV_FSL_CAAM_PRNG_API + bool "Register Pseudo random number generation implementation with Crypto API" + default y + select CRYPTO_RNG + help + Selecting this will register the SEC hardware prng to + the Crypto API. + endif # CRYPTO_DEV_FSL_CAAM_JR endif # CRYPTO_DEV_FSL_CAAM diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 3570286eb9ce..4f9837a8a1ad 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -20,6 +20,7 @@ caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += caamalg_qi.o caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o +caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API) += caamprng.o caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caampkc.o pkc_desc.o caam-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += qi.o diff --git a/drivers/crypto/caam/caamprng.c b/drivers/crypto/caam/caamprng.c new file mode 100644 index 000000000000..4839e66300a2 --- /dev/null +++ b/drivers/crypto/caam/caamprng.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Driver to expose SEC4 PRNG via crypto RNG API + * + * Copyright 2022 NXP + * + */ + +#include +#include +#include "compat.h" +#include "regs.h" +#include "intern.h" +#include "desc_constr.h" +#include "jr.h" +#include "error.h" + +/* + * Length of used descriptors, see caam_init_desc() + */ +#define CAAM_PRNG_MAX_DESC_LEN (CAAM_CMD_SZ + \ + CAAM_CMD_SZ + \ + CAAM_CMD_SZ + CAAM_PTR_SZ_MAX) + +/* prng per-device context */ +struct caam_prng_ctx { + int err; + struct completion done; +}; + +struct caam_prng_alg { + struct rng_alg rng; + bool registered; +}; + +static void caam_prng_done(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + struct caam_prng_ctx *jctx = context; + + jctx->err = err ? caam_jr_strstatus(jrdev, err) : 0; + + complete(&jctx->done); +} + +static u32 *caam_init_reseed_desc(u32 *desc) +{ + init_job_desc(desc, 0); /* + 1 cmd_sz */ + /* Generate random bytes: + 1 cmd_sz */ + append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | + OP_ALG_AS_FINALIZE); + + print_hex_dump_debug("prng reseed desc@: ", DUMP_PREFIX_ADDRESS, + 16, 4, desc, desc_bytes(desc), 1); + + return desc; +} + +static u32 *caam_init_prng_desc(u32 *desc, dma_addr_t dst_dma, u32 len) +{ + init_job_desc(desc, 0); /* + 1 cmd_sz */ + /* Generate random bytes: + 1 cmd_sz */ + append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG); + /* Store bytes: + 1 cmd_sz + caam_ptr_sz */ + append_fifo_store(desc, dst_dma, + len, FIFOST_TYPE_RNGSTORE); + + print_hex_dump_debug("prng job desc@: ", DUMP_PREFIX_ADDRESS, + 16, 4, desc, desc_bytes(desc), 1); + + return desc; +} + +static int caam_prng_generate(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int dlen) +{ + struct caam_prng_ctx ctx; + struct device *jrdev; + dma_addr_t dst_dma; + u32 *desc; + u8 *buf; + int ret; + + buf = kzalloc(dlen, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + jrdev = caam_jr_alloc(); + ret = PTR_ERR_OR_ZERO(jrdev); + if (ret) { + pr_err("Job Ring Device allocation failed\n"); + kfree(buf); + return ret; + } + + desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA); + if (!desc) { + ret = -ENOMEM; + goto out1; + } + + dst_dma = dma_map_single(jrdev, buf, dlen, DMA_FROM_DEVICE); + if (dma_mapping_error(jrdev, dst_dma)) { + dev_err(jrdev, "Failed to map destination buffer memory\n"); + ret = -ENOMEM; + goto out; + } + + init_completion(&ctx.done); + ret = caam_jr_enqueue(jrdev, + caam_init_prng_desc(desc, dst_dma, dlen), + caam_prng_done, &ctx); + + if (ret == -EINPROGRESS) { + wait_for_completion(&ctx.done); + ret = ctx.err; + } + + dma_unmap_single(jrdev, dst_dma, dlen, DMA_FROM_DEVICE); + + if (!ret) + memcpy(dst, buf, dlen); +out: + kfree(desc); +out1: + caam_jr_free(jrdev); + kfree(buf); + return ret; +} + +static void caam_prng_exit(struct crypto_tfm *tfm) {} + +static int caam_prng_init(struct crypto_tfm *tfm) +{ + return 0; +} + +static int caam_prng_seed(struct crypto_rng *tfm, + const u8 *seed, unsigned int slen) +{ + struct caam_prng_ctx ctx; + struct device *jrdev; + u32 *desc; + int ret; + + if (slen) { + pr_err("Seed length should be zero\n"); + return -EINVAL; + } + + jrdev = caam_jr_alloc(); + ret = PTR_ERR_OR_ZERO(jrdev); + if (ret) { + pr_err("Job Ring Device allocation failed\n"); + return ret; + } + + desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA); + if (!desc) { + caam_jr_free(jrdev); + return -ENOMEM; + } + + init_completion(&ctx.done); + ret = caam_jr_enqueue(jrdev, + caam_init_reseed_desc(desc), + caam_prng_done, &ctx); + + if (ret == -EINPROGRESS) { + wait_for_completion(&ctx.done); + ret = ctx.err; + } + + kfree(desc); + caam_jr_free(jrdev); + return ret; +} + +static struct caam_prng_alg caam_prng_alg = { + .rng = { + .generate = caam_prng_generate, + .seed = caam_prng_seed, + .seedsize = 0, + .base = { + .cra_name = "stdrng", + .cra_driver_name = "prng-caam", + .cra_priority = 500, + .cra_ctxsize = sizeof(struct caam_prng_ctx), + .cra_module = THIS_MODULE, + .cra_init = caam_prng_init, + .cra_exit = caam_prng_exit, + }, + } +}; + +void caam_prng_unregister(void *data) +{ + if (caam_prng_alg.registered) + crypto_unregister_rng(&caam_prng_alg.rng); +} + +int caam_prng_register(struct device *ctrldev) +{ + struct caam_drv_private *priv = dev_get_drvdata(ctrldev); + u32 rng_inst; + int ret = 0; + + /* Check for available RNG blocks before registration */ + if (priv->era < 10) + rng_inst = (rd_reg32(&priv->jr[0]->perfmon.cha_num_ls) & + CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT; + else + rng_inst = rd_reg32(&priv->jr[0]->vreg.rng) & CHA_VER_NUM_MASK; + + if (!rng_inst) { + dev_dbg(ctrldev, "RNG block is not available... skipping registering algorithm\n"); + return ret; + } + + ret = crypto_register_rng(&caam_prng_alg.rng); + if (ret) { + dev_err(ctrldev, + "couldn't register rng crypto alg: %d\n", + ret); + return ret; + } + + caam_prng_alg.registered = true; + + dev_info(ctrldev, + "rng crypto API alg registered %s\n", caam_prng_alg.rng.base.cra_driver_name); + + return 0; +} diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 7d45b21bd55a..c2f51365df1b 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -185,6 +185,21 @@ static inline void caam_rng_exit(struct device *dev) {} #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */ +#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API + +int caam_prng_register(struct device *dev); +void caam_prng_unregister(void *data); + +#else + +static inline int caam_prng_register(struct device *dev) +{ + return 0; +} + +static inline void caam_prng_unregister(void *data) {} +#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API */ + #ifdef CONFIG_CAAM_QI int caam_qi_algapi_init(struct device *dev); diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 7f2b1101f567..724fdec18bf9 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -39,6 +39,7 @@ static void register_algs(struct caam_drv_private_jr *jrpriv, caam_algapi_hash_init(dev); caam_pkc_init(dev); jrpriv->hwrng = !caam_rng_init(dev); + caam_prng_register(dev); caam_qi_algapi_init(dev); algs_unlock: @@ -53,7 +54,7 @@ static void unregister_algs(void) goto algs_unlock; caam_qi_algapi_exit(); - + caam_prng_unregister(NULL); caam_pkc_exit(); caam_algapi_hash_exit(); caam_algapi_exit(); From 59f71498c7ff591824863be55b0eff5aae743947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 29 Apr 2022 16:03:49 +0200 Subject: [PATCH 076/116] crypto: atmel-i2c - Simplify return code in probe function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no semantical change introduced by this change. Signed-off-by: Uwe Kleine-König Reviewed-by: Claudiu Beznea Signed-off-by: Herbert Xu --- drivers/crypto/atmel-i2c.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 6fd3e969211d..384865ef96ce 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -364,11 +364,7 @@ int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) i2c_set_clientdata(client, i2c_priv); - ret = device_sanity_check(client); - if (ret) - return ret; - - return 0; + return device_sanity_check(client); } EXPORT_SYMBOL(atmel_i2c_probe); From 0a2f4b5785ca5e9c5bc2d4e59183e016096ee889 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 30 Apr 2022 16:01:46 +0900 Subject: [PATCH 077/116] crypto: atmel - Avoid flush_scheduled_work() usage Flushing system-wide workqueues is dangerous and will be forbidden. Replace system_wq with local atmel_wq. If CONFIG_CRYPTO_DEV_ATMEL_{I2C,ECC,SHA204A}=y, the ordering in Makefile guarantees that module_init() for atmel-i2c runs before module_init() for atmel-ecc and atmel-sha204a runs. Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Signed-off-by: Herbert Xu --- drivers/crypto/Makefile | 1 + drivers/crypto/atmel-ecc.c | 2 +- drivers/crypto/atmel-i2c.c | 24 +++++++++++++++++++++++- drivers/crypto/atmel-i2c.h | 1 + drivers/crypto/atmel-sha204a.c | 2 +- 5 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 0a4fff23d272..f81703a86b98 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_CRYPTO_DEV_ALLWINNER) += allwinner/ obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o +# __init ordering requires atmel-i2c being before atmel-ecc and atmel-sha204a. obj-$(CONFIG_CRYPTO_DEV_ATMEL_I2C) += atmel-i2c.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA204A) += atmel-sha204a.o diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c index 333fbefbbccb..59a57279e77b 100644 --- a/drivers/crypto/atmel-ecc.c +++ b/drivers/crypto/atmel-ecc.c @@ -398,7 +398,7 @@ static int __init atmel_ecc_init(void) static void __exit atmel_ecc_exit(void) { - flush_scheduled_work(); + atmel_i2c_flush_queue(); i2c_del_driver(&atmel_ecc_driver); } diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 384865ef96ce..81ce09bedda8 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -263,6 +263,8 @@ static void atmel_i2c_work_handler(struct work_struct *work) work_data->cbk(work_data, work_data->areq, status); } +static struct workqueue_struct *atmel_wq; + void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data, void (*cbk)(struct atmel_i2c_work_data *work_data, void *areq, int status), @@ -272,10 +274,16 @@ void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data, work_data->areq = areq; INIT_WORK(&work_data->work, atmel_i2c_work_handler); - schedule_work(&work_data->work); + queue_work(atmel_wq, &work_data->work); } EXPORT_SYMBOL(atmel_i2c_enqueue); +void atmel_i2c_flush_queue(void) +{ + flush_workqueue(atmel_wq); +} +EXPORT_SYMBOL(atmel_i2c_flush_queue); + static inline size_t atmel_i2c_wake_token_sz(u32 bus_clk_rate) { u32 no_of_bits = DIV_ROUND_UP(TWLO_USEC * bus_clk_rate, USEC_PER_SEC); @@ -368,6 +376,20 @@ int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) } EXPORT_SYMBOL(atmel_i2c_probe); +static int __init atmel_i2c_init(void) +{ + atmel_wq = alloc_workqueue("atmel_wq", 0, 0); + return atmel_wq ? 0 : -ENOMEM; +} + +static void __exit atmel_i2c_exit(void) +{ + destroy_workqueue(atmel_wq); +} + +module_init(atmel_i2c_init); +module_exit(atmel_i2c_exit); + MODULE_AUTHOR("Tudor Ambarus "); MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index 63b97b104f16..48929efe2a5b 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -173,6 +173,7 @@ void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data, void (*cbk)(struct atmel_i2c_work_data *work_data, void *areq, int status), void *areq); +void atmel_i2c_flush_queue(void); int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd); diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 50363999b990..e4087bdd2475 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -160,7 +160,7 @@ static int __init atmel_sha204a_init(void) static void __exit atmel_sha204a_exit(void) { - flush_scheduled_work(); + atmel_i2c_flush_queue(); i2c_del_driver(&atmel_sha204a_driver); } From b52455a73db95ef90fd3c2be84db77b55be43f46 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 1 May 2022 22:07:49 +0900 Subject: [PATCH 078/116] crypto: vmx - Align the short log with Makefile cleanups I notieced the log is not properly aligned: PERL drivers/crypto/vmx/aesp8-ppc.S CC [M] fs/xfs/xfs_reflink.o PERL drivers/crypto/vmx/ghashp8-ppc.S CC [M] drivers/crypto/vmx/aes.o Add some spaces after 'PERL'. While I was here, I cleaned up the Makefile: - Merge the two similar rules - Remove redundant 'clean-files' (Having 'targets' is enough) - Move the flavour into the build command This still avoids the build failures fixed by commit 4ee812f6143d ("crypto: vmx - Avoid weird build failures"). Signed-off-by: Masahiro Yamada Reported-by: kernel test robot Signed-off-by: Herbert Xu --- drivers/crypto/vmx/Makefile | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index 709670d2b553..df93ba63b1cd 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -2,21 +2,10 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -override flavour := linux-ppc64le -else -override flavour := linux-ppc64 -endif - -quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $(<) $(flavour) > $(@) +quiet_cmd_perl = PERL $@ + cmd_perl = $(PERL) $< $(if $(CONFIG_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@ targets += aesp8-ppc.S ghashp8-ppc.S -$(obj)/aesp8-ppc.S: $(src)/aesp8-ppc.pl FORCE +$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE $(call if_changed,perl) - -$(obj)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl FORCE - $(call if_changed,perl) - -clean-files := aesp8-ppc.S ghashp8-ppc.S From e4d1293cb19b4a84db74614a8ae94351a619d78a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 May 2022 14:22:43 +0900 Subject: [PATCH 079/116] crypto: vmx - Fix build error When I refactored this Makefile, I accidentally changed the CONFIG option. Fixes: b52455a73db9 ("crypto: vmx - Align the short log with Makefile cleanups") Reported-by: kernel test robot Signed-off-by: Masahiro Yamada Signed-off-by: Herbert Xu --- drivers/crypto/vmx/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index df93ba63b1cd..2560cfea1dec 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $< $(if $(CONFIG_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@ + cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@ targets += aesp8-ppc.S ghashp8-ppc.S From 282ee0716f618655d3b2a431498966a4bf797f4c Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:11 +0000 Subject: [PATCH 080/116] crypto: sun8i-ce - Fix minor style issue This patch remove a double blank line. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 35e3cadccac2..01d032e08825 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -398,7 +398,6 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm) sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx) + crypto_skcipher_reqsize(op->fallback_tfm); - dev_info(op->ce->dev, "Fallback for %s is %s\n", crypto_tfm_alg_driver_name(&sktfm->base), crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm))); From 22f7c2f8cfdac70a4414b66ce6ffa270fcd15aa7 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:12 +0000 Subject: [PATCH 081/116] crypto: sun8i-ce - do not allocate memory when handling requests Instead of allocate memory on each requests, it is easier to pre-allocate buffer for IV. This made error path easier. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun8i-ce/sun8i-ce-cipher.c | 29 ++++++------------- .../crypto/allwinner/sun8i-ce/sun8i-ce-core.c | 20 ++++++++++--- drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h | 8 ++--- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 01d032e08825..0b1ce58bdeb9 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -152,23 +152,13 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req ivsize = crypto_skcipher_ivsize(tfm); if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { rctx->ivlen = ivsize; - rctx->bounce_iv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA); - if (!rctx->bounce_iv) { - err = -ENOMEM; - goto theend_key; - } if (rctx->op_dir & CE_DECRYPTION) { - rctx->backup_iv = kzalloc(ivsize, GFP_KERNEL); - if (!rctx->backup_iv) { - err = -ENOMEM; - goto theend_key; - } offset = areq->cryptlen - ivsize; - scatterwalk_map_and_copy(rctx->backup_iv, areq->src, + scatterwalk_map_and_copy(chan->backup_iv, areq->src, offset, ivsize, 0); } - memcpy(rctx->bounce_iv, areq->iv, ivsize); - rctx->addr_iv = dma_map_single(ce->dev, rctx->bounce_iv, rctx->ivlen, + memcpy(chan->bounce_iv, areq->iv, ivsize); + rctx->addr_iv = dma_map_single(ce->dev, chan->bounce_iv, rctx->ivlen, DMA_TO_DEVICE); if (dma_mapping_error(ce->dev, rctx->addr_iv)) { dev_err(ce->dev, "Cannot DMA MAP IV\n"); @@ -257,16 +247,15 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE); offset = areq->cryptlen - ivsize; if (rctx->op_dir & CE_DECRYPTION) { - memcpy(areq->iv, rctx->backup_iv, ivsize); - kfree_sensitive(rctx->backup_iv); + memcpy(areq->iv, chan->backup_iv, ivsize); + memzero_explicit(chan->backup_iv, ivsize); } else { scatterwalk_map_and_copy(areq->iv, areq->dst, offset, ivsize, 0); } - kfree(rctx->bounce_iv); + memzero_explicit(chan->bounce_iv, ivsize); } -theend_key: dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE); theend: @@ -322,13 +311,13 @@ static int sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_r dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE); offset = areq->cryptlen - ivsize; if (rctx->op_dir & CE_DECRYPTION) { - memcpy(areq->iv, rctx->backup_iv, ivsize); - kfree_sensitive(rctx->backup_iv); + memcpy(areq->iv, chan->backup_iv, ivsize); + memzero_explicit(chan->backup_iv, ivsize); } else { scatterwalk_map_and_copy(areq->iv, areq->dst, offset, ivsize, 0); } - kfree(rctx->bounce_iv); + memzero_explicit(chan->bounce_iv, ivsize); } dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE); diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index d8623c7e0d1d..eeaa856b8f81 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -283,7 +283,7 @@ static struct sun8i_ce_alg_template ce_algs[] = { .cra_priority = 400, .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | - CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx), .cra_module = THIS_MODULE, @@ -310,7 +310,7 @@ static struct sun8i_ce_alg_template ce_algs[] = { .cra_priority = 400, .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | - CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx), .cra_module = THIS_MODULE, @@ -336,7 +336,7 @@ static struct sun8i_ce_alg_template ce_algs[] = { .cra_priority = 400, .cra_blocksize = DES3_EDE_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | - CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx), .cra_module = THIS_MODULE, @@ -363,7 +363,7 @@ static struct sun8i_ce_alg_template ce_algs[] = { .cra_priority = 400, .cra_blocksize = DES3_EDE_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | - CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx), .cra_module = THIS_MODULE, @@ -673,6 +673,18 @@ static int sun8i_ce_allocate_chanlist(struct sun8i_ce_dev *ce) err = -ENOMEM; goto error_engine; } + ce->chanlist[i].bounce_iv = devm_kmalloc(ce->dev, AES_BLOCK_SIZE, + GFP_KERNEL | GFP_DMA); + if (!ce->chanlist[i].bounce_iv) { + err = -ENOMEM; + goto error_engine; + } + ce->chanlist[i].backup_iv = devm_kmalloc(ce->dev, AES_BLOCK_SIZE, + GFP_KERNEL); + if (!ce->chanlist[i].backup_iv) { + err = -ENOMEM; + goto error_engine; + } } return 0; error_engine: diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 624a5926f21f..23613a0ec9b0 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -186,6 +186,8 @@ struct ce_task { * @status: set to 1 by interrupt if task is done * @t_phy: Physical address of task * @tl: pointer to the current ce_task for this flow + * @backup_iv: buffer which contain the next IV to store + * @bounce_iv: buffer which contain the IV * @stat_req: number of request done by this flow */ struct sun8i_ce_flow { @@ -195,6 +197,8 @@ struct sun8i_ce_flow { dma_addr_t t_phy; int timeout; struct ce_task *tl; + void *backup_iv; + void *bounce_iv; #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG unsigned long stat_req; #endif @@ -241,8 +245,6 @@ struct sun8i_ce_dev { * struct sun8i_cipher_req_ctx - context for a skcipher request * @op_dir: direction (encrypt vs decrypt) for this request * @flow: the flow to use for this request - * @backup_iv: buffer which contain the next IV to store - * @bounce_iv: buffer which contain the IV * @ivlen: size of bounce_iv * @nr_sgs: The number of source SG (as given by dma_map_sg()) * @nr_sgd: The number of destination SG (as given by dma_map_sg()) @@ -253,8 +255,6 @@ struct sun8i_ce_dev { struct sun8i_cipher_req_ctx { u32 op_dir; int flow; - void *backup_iv; - void *bounce_iv; unsigned int ivlen; int nr_sgs; int nr_sgd; From 22d03a0aad0ecf5ab871a83493f593e2127961a1 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:13 +0000 Subject: [PATCH 082/116] crypto: sun4i-ss - do not allocate backup IV on requests Instead of allocate memory on each requests, it is easier to pre-allocate buffer for backup IV. This made error path easier. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun4i-ss/sun4i-ss-cipher.c | 22 +++++++------------ drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h | 1 + 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index dec79fa3ebaf..10fe9f73a5fb 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -20,7 +20,6 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) unsigned int ivsize = crypto_skcipher_ivsize(tfm); struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq); u32 mode = ctx->mode; - void *backup_iv = NULL; /* when activating SS, the default FIFO space is SS_RX_DEFAULT(32) */ u32 rx_cnt = SS_RX_DEFAULT; u32 tx_cnt = 0; @@ -48,10 +47,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) } if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) { - backup_iv = kzalloc(ivsize, GFP_KERNEL); - if (!backup_iv) - return -ENOMEM; - scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0); + scatterwalk_map_and_copy(ctx->backup_iv, areq->src, + areq->cryptlen - ivsize, ivsize, 0); } if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) { @@ -134,8 +131,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) if (areq->iv) { if (mode & SS_DECRYPTION) { - memcpy(areq->iv, backup_iv, ivsize); - kfree_sensitive(backup_iv); + memcpy(areq->iv, ctx->backup_iv, ivsize); + memzero_explicit(ctx->backup_iv, ivsize); } else { scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize, ivsize, 0); @@ -199,7 +196,6 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) unsigned int ileft = areq->cryptlen; unsigned int oleft = areq->cryptlen; unsigned int todo; - void *backup_iv = NULL; struct sg_mapping_iter mi, mo; unsigned long pi = 0, po = 0; /* progress for in and out */ bool miter_err; @@ -244,10 +240,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) return sun4i_ss_cipher_poll_fallback(areq); if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) { - backup_iv = kzalloc(ivsize, GFP_KERNEL); - if (!backup_iv) - return -ENOMEM; - scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0); + scatterwalk_map_and_copy(ctx->backup_iv, areq->src, + areq->cryptlen - ivsize, ivsize, 0); } if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) { @@ -384,8 +378,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) } if (areq->iv) { if (mode & SS_DECRYPTION) { - memcpy(areq->iv, backup_iv, ivsize); - kfree_sensitive(backup_iv); + memcpy(areq->iv, ctx->backup_iv, ivsize); + memzero_explicit(ctx->backup_iv, ivsize); } else { scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize, ivsize, 0); diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h index 0fee6f4e2d90..ba59c7a48825 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h @@ -183,6 +183,7 @@ struct sun4i_tfm_ctx { struct sun4i_cipher_req_ctx { u32 mode; + u8 backup_iv[AES_BLOCK_SIZE]; struct skcipher_request fallback_req; // keep at the end }; From 359e893e8af456be2fefabe851716237df289cbf Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:14 +0000 Subject: [PATCH 083/116] crypto: sun8i-ss - rework handling of IV sun8i-ss fail handling IVs when doing decryption of multiple SGs in-place. It should backup the last block of each SG source for using it later as IVs. In the same time remove allocation on requests path for storing all IVs. Fixes: f08fcced6d00 ("crypto: allwinner - Add sun8i-ss cryptographic offloader") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun8i-ss/sun8i-ss-cipher.c | 115 ++++++++++++------ .../crypto/allwinner/sun8i-ss/sun8i-ss-core.c | 30 +++-- drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h | 14 ++- 3 files changed, 107 insertions(+), 52 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 554e400d41ca..70e2e6e37389 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -93,6 +93,68 @@ static int sun8i_ss_cipher_fallback(struct skcipher_request *areq) return err; } +static int sun8i_ss_setup_ivs(struct skcipher_request *areq) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sun8i_ss_dev *ss = op->ss; + struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq); + struct scatterlist *sg = areq->src; + unsigned int todo, offset; + unsigned int len = areq->cryptlen; + unsigned int ivsize = crypto_skcipher_ivsize(tfm); + struct sun8i_ss_flow *sf = &ss->flows[rctx->flow]; + int i = 0; + u32 a; + int err; + + rctx->ivlen = ivsize; + if (rctx->op_dir & SS_DECRYPTION) { + offset = areq->cryptlen - ivsize; + scatterwalk_map_and_copy(sf->biv, areq->src, offset, + ivsize, 0); + } + + /* we need to copy all IVs from source in case DMA is bi-directionnal */ + while (sg && len) { + if (sg_dma_len(sg) == 0) { + sg = sg_next(sg); + continue; + } + if (i == 0) + memcpy(sf->iv[0], areq->iv, ivsize); + a = dma_map_single(ss->dev, sf->iv[i], ivsize, DMA_TO_DEVICE); + if (dma_mapping_error(ss->dev, a)) { + memzero_explicit(sf->iv[i], ivsize); + dev_err(ss->dev, "Cannot DMA MAP IV\n"); + err = -EFAULT; + goto dma_iv_error; + } + rctx->p_iv[i] = a; + /* we need to setup all others IVs only in the decrypt way */ + if (rctx->op_dir & SS_ENCRYPTION) + return 0; + todo = min(len, sg_dma_len(sg)); + len -= todo; + i++; + if (i < MAX_SG) { + offset = sg->length - ivsize; + scatterwalk_map_and_copy(sf->iv[i], sg, offset, ivsize, 0); + } + rctx->niv = i; + sg = sg_next(sg); + } + + return 0; +dma_iv_error: + i--; + while (i >= 0) { + dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE); + memzero_explicit(sf->iv[i], ivsize); + } + return err; +} + static int sun8i_ss_cipher(struct skcipher_request *areq) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); @@ -101,9 +163,9 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct sun8i_ss_alg_template *algt; + struct sun8i_ss_flow *sf = &ss->flows[rctx->flow]; struct scatterlist *sg; unsigned int todo, len, offset, ivsize; - void *backup_iv = NULL; int nr_sgs = 0; int nr_sgd = 0; int err = 0; @@ -134,30 +196,9 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) ivsize = crypto_skcipher_ivsize(tfm); if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { - rctx->ivlen = ivsize; - rctx->biv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA); - if (!rctx->biv) { - err = -ENOMEM; + err = sun8i_ss_setup_ivs(areq); + if (err) goto theend_key; - } - if (rctx->op_dir & SS_DECRYPTION) { - backup_iv = kzalloc(ivsize, GFP_KERNEL); - if (!backup_iv) { - err = -ENOMEM; - goto theend_key; - } - offset = areq->cryptlen - ivsize; - scatterwalk_map_and_copy(backup_iv, areq->src, offset, - ivsize, 0); - } - memcpy(rctx->biv, areq->iv, ivsize); - rctx->p_iv = dma_map_single(ss->dev, rctx->biv, rctx->ivlen, - DMA_TO_DEVICE); - if (dma_mapping_error(ss->dev, rctx->p_iv)) { - dev_err(ss->dev, "Cannot DMA MAP IV\n"); - err = -ENOMEM; - goto theend_iv; - } } if (areq->src == areq->dst) { nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src), @@ -243,21 +284,19 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) } theend_iv: - if (rctx->p_iv) - dma_unmap_single(ss->dev, rctx->p_iv, rctx->ivlen, - DMA_TO_DEVICE); - if (areq->iv && ivsize > 0) { - if (rctx->biv) { - offset = areq->cryptlen - ivsize; - if (rctx->op_dir & SS_DECRYPTION) { - memcpy(areq->iv, backup_iv, ivsize); - kfree_sensitive(backup_iv); - } else { - scatterwalk_map_and_copy(areq->iv, areq->dst, offset, - ivsize, 0); - } - kfree(rctx->biv); + for (i = 0; i < rctx->niv; i++) { + dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE); + memzero_explicit(sf->iv[i], ivsize); + } + + offset = areq->cryptlen - ivsize; + if (rctx->op_dir & SS_DECRYPTION) { + memcpy(areq->iv, sf->biv, ivsize); + memzero_explicit(sf->biv, ivsize); + } else { + scatterwalk_map_and_copy(areq->iv, areq->dst, offset, + ivsize, 0); } } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 319fe3279a71..657530578643 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -66,6 +66,7 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx const char *name) { int flow = rctx->flow; + unsigned int ivlen = rctx->ivlen; u32 v = SS_START; int i; @@ -104,15 +105,14 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx mutex_lock(&ss->mlock); writel(rctx->p_key, ss->base + SS_KEY_ADR_REG); - if (i == 0) { - if (rctx->p_iv) - writel(rctx->p_iv, ss->base + SS_IV_ADR_REG); - } else { - if (rctx->biv) { - if (rctx->op_dir == SS_ENCRYPTION) - writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG); + if (ivlen) { + if (rctx->op_dir == SS_ENCRYPTION) { + if (i == 0) + writel(rctx->p_iv[0], ss->base + SS_IV_ADR_REG); else - writel(rctx->t_src[i - 1].addr + rctx->t_src[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG); + writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - ivlen, ss->base + SS_IV_ADR_REG); + } else { + writel(rctx->p_iv[i], ss->base + SS_IV_ADR_REG); } } @@ -464,7 +464,7 @@ static void sun8i_ss_free_flows(struct sun8i_ss_dev *ss, int i) */ static int allocate_flows(struct sun8i_ss_dev *ss) { - int i, err; + int i, j, err; ss->flows = devm_kcalloc(ss->dev, MAXFLOW, sizeof(struct sun8i_ss_flow), GFP_KERNEL); @@ -474,6 +474,18 @@ static int allocate_flows(struct sun8i_ss_dev *ss) for (i = 0; i < MAXFLOW; i++) { init_completion(&ss->flows[i].complete); + ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, + GFP_KERNEL | GFP_DMA); + if (!ss->flows[i].biv) + goto error_engine; + + for (j = 0; j < MAX_SG; j++) { + ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, + GFP_KERNEL | GFP_DMA); + if (!ss->flows[i].iv[j]) + goto error_engine; + } + ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true); if (!ss->flows[i].engine) { dev_err(ss->dev, "Cannot allocate engine\n"); diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index 28188685b910..57ada8653855 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -121,11 +121,15 @@ struct sginfo { * @complete: completion for the current task on this flow * @status: set to 1 by interrupt if task is done * @stat_req: number of request done by this flow + * @iv: list of IV to use for each step + * @biv: buffer which contain the backuped IV */ struct sun8i_ss_flow { struct crypto_engine *engine; struct completion complete; int status; + u8 *iv[MAX_SG]; + u8 *biv; #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG unsigned long stat_req; #endif @@ -164,28 +168,28 @@ struct sun8i_ss_dev { * @t_src: list of mapped SGs with their size * @t_dst: list of mapped SGs with their size * @p_key: DMA address of the key - * @p_iv: DMA address of the IV + * @p_iv: DMA address of the IVs + * @niv: Number of IVs DMA mapped * @method: current algorithm for this request * @op_mode: op_mode for this request * @op_dir: direction (encrypt vs decrypt) for this request * @flow: the flow to use for this request - * @ivlen: size of biv + * @ivlen: size of IVs * @keylen: keylen for this request - * @biv: buffer which contain the IV * @fallback_req: request struct for invoking the fallback skcipher TFM */ struct sun8i_cipher_req_ctx { struct sginfo t_src[MAX_SG]; struct sginfo t_dst[MAX_SG]; u32 p_key; - u32 p_iv; + u32 p_iv[MAX_SG]; + int niv; u32 method; u32 op_mode; u32 op_dir; int flow; unsigned int ivlen; unsigned int keylen; - void *biv; struct skcipher_request fallback_req; // keep at the end }; From c149e4763d28bb4c0e5daae8a59f2c74e889f407 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:15 +0000 Subject: [PATCH 084/116] crypto: sun8i-ss - handle zero sized sg sun8i-ss does not handle well the possible zero sized sg. Fixes: d9b45418a917 ("crypto: sun8i-ss - support hash algorithms") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 1a71ed49d233..ca4f280af35d 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -380,13 +380,21 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) } len = areq->nbytes; - for_each_sg(areq->src, sg, nr_sgs, i) { + sg = areq->src; + i = 0; + while (len > 0 && sg) { + if (sg_dma_len(sg) == 0) { + sg = sg_next(sg); + continue; + } rctx->t_src[i].addr = sg_dma_address(sg); todo = min(len, sg_dma_len(sg)); rctx->t_src[i].len = todo / 4; len -= todo; rctx->t_dst[i].addr = addr_res; rctx->t_dst[i].len = digestsize / 4; + sg = sg_next(sg); + i++; } if (len > 0) { dev_err(ss->dev, "remaining len %d\n", len); From 46e2fcbcbed6b37228bad4a9397e7d60555b8c44 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:16 +0000 Subject: [PATCH 085/116] crypto: sun8i-ss - remove redundant test Some fallback tests were redundant with what sun8i_ss_hash_need_fallback() already do. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index ca4f280af35d..eaa0bbaf5581 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -288,21 +288,11 @@ int sun8i_ss_hash_digest(struct ahash_request *areq) struct sun8i_ss_alg_template *algt; struct sun8i_ss_dev *ss; struct crypto_engine *engine; - struct scatterlist *sg; - int nr_sgs, e, i; + int e; if (sun8i_ss_hash_need_fallback(areq)) return sun8i_ss_hash_digest_fb(areq); - nr_sgs = sg_nents(areq->src); - if (nr_sgs > MAX_SG - 1) - return sun8i_ss_hash_digest_fb(areq); - - for_each_sg(areq->src, sg, nr_sgs, i) { - if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32))) - return sun8i_ss_hash_digest_fb(areq); - } - algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash); ss = algt->ss; From d86e3f37a6d05d4946788904b8968eb6287601ae Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:17 +0000 Subject: [PATCH 086/116] crypto: sun8i-ss - test error before assigning The first thing we should do after dma_map_single() is to test the result. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index eaa0bbaf5581..49e2e947b36b 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -420,15 +420,15 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) } addr_pad = dma_map_single(ss->dev, pad, j * 4, DMA_TO_DEVICE); - rctx->t_src[i].addr = addr_pad; - rctx->t_src[i].len = j; - rctx->t_dst[i].addr = addr_res; - rctx->t_dst[i].len = digestsize / 4; if (dma_mapping_error(ss->dev, addr_pad)) { dev_err(ss->dev, "DMA error on padding SG\n"); err = -EINVAL; goto theend; } + rctx->t_src[i].addr = addr_pad; + rctx->t_src[i].len = j; + rctx->t_dst[i].addr = addr_res; + rctx->t_dst[i].len = digestsize / 4; err = sun8i_ss_run_hash_task(ss, rctx, crypto_tfm_alg_name(areq->base.tfm)); From 4d867bebdc3afa97e00f9b64a21a3c51ce7feecf Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:18 +0000 Subject: [PATCH 087/116] crypto: sun8i-ss - use sg_nents_for_len When testing with some large SG list, the sun8i-ss drivers always fallback even if it can handle it. So use sg_nents_for_len() which permits to see less SGs than needed. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun8i-ss/sun8i-ss-cipher.c | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 70e2e6e37389..c4cb1ab1eeaa 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -29,7 +29,8 @@ static bool sun8i_ss_need_fallback(struct skcipher_request *areq) if (areq->cryptlen == 0 || areq->cryptlen % 16) return true; - if (sg_nents(areq->src) > 8 || sg_nents(areq->dst) > 8) + if (sg_nents_for_len(areq->src, areq->cryptlen) > 8 || + sg_nents_for_len(areq->dst, areq->cryptlen) > 8) return true; sg = areq->src; @@ -169,6 +170,8 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) int nr_sgs = 0; int nr_sgd = 0; int err = 0; + int nsgs = sg_nents_for_len(areq->src, areq->cryptlen); + int nsgd = sg_nents_for_len(areq->dst, areq->cryptlen); int i; algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher); @@ -201,8 +204,7 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) goto theend_key; } if (areq->src == areq->dst) { - nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src), - DMA_BIDIRECTIONAL); + nr_sgs = dma_map_sg(ss->dev, areq->src, nsgs, DMA_BIDIRECTIONAL); if (nr_sgs <= 0 || nr_sgs > 8) { dev_err(ss->dev, "Invalid sg number %d\n", nr_sgs); err = -EINVAL; @@ -210,15 +212,13 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) } nr_sgd = nr_sgs; } else { - nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src), - DMA_TO_DEVICE); + nr_sgs = dma_map_sg(ss->dev, areq->src, nsgs, DMA_TO_DEVICE); if (nr_sgs <= 0 || nr_sgs > 8) { dev_err(ss->dev, "Invalid sg number %d\n", nr_sgs); err = -EINVAL; goto theend_iv; } - nr_sgd = dma_map_sg(ss->dev, areq->dst, sg_nents(areq->dst), - DMA_FROM_DEVICE); + nr_sgd = dma_map_sg(ss->dev, areq->dst, nsgd, DMA_FROM_DEVICE); if (nr_sgd <= 0 || nr_sgd > 8) { dev_err(ss->dev, "Invalid sg number %d\n", nr_sgd); err = -EINVAL; @@ -274,13 +274,10 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) theend_sgs: if (areq->src == areq->dst) { - dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src), - DMA_BIDIRECTIONAL); + dma_unmap_sg(ss->dev, areq->src, nsgs, DMA_BIDIRECTIONAL); } else { - dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src), - DMA_TO_DEVICE); - dma_unmap_sg(ss->dev, areq->dst, sg_nents(areq->dst), - DMA_FROM_DEVICE); + dma_unmap_sg(ss->dev, areq->src, nsgs, DMA_TO_DEVICE); + dma_unmap_sg(ss->dev, areq->dst, nsgd, DMA_FROM_DEVICE); } theend_iv: From 8eec4563f152981a441693fc97c5459843dc5e6e Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:19 +0000 Subject: [PATCH 088/116] crypto: sun8i-ss - do not allocate memory when handling hash requests Instead of allocate memory on each requests, it is easier to pre-allocate buffers. This made error path easier. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c | 10 ++++++++++ drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 15 +++------------ drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h | 4 ++++ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 657530578643..786b6f5cf300 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -486,6 +486,16 @@ static int allocate_flows(struct sun8i_ss_dev *ss) goto error_engine; } + /* the padding could be up to two block. */ + ss->flows[i].pad = devm_kmalloc(ss->dev, SHA256_BLOCK_SIZE * 2, + GFP_KERNEL | GFP_DMA); + if (!ss->flows[i].pad) + goto error_engine; + ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE, + GFP_KERNEL | GFP_DMA); + if (!ss->flows[i].result) + goto error_engine; + ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true); if (!ss->flows[i].engine) { dev_err(ss->dev, "Cannot allocate engine\n"); diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 49e2e947b36b..9582ac450d08 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -332,18 +332,11 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) if (digestsize == SHA224_DIGEST_SIZE) digestsize = SHA256_DIGEST_SIZE; - /* the padding could be up to two block. */ - pad = kzalloc(algt->alg.hash.halg.base.cra_blocksize * 2, GFP_KERNEL | GFP_DMA); - if (!pad) - return -ENOMEM; + result = ss->flows[rctx->flow].result; + pad = ss->flows[rctx->flow].pad; + memset(pad, 0, algt->alg.hash.halg.base.cra_blocksize * 2); bf = (__le32 *)pad; - result = kzalloc(digestsize, GFP_KERNEL | GFP_DMA); - if (!result) { - kfree(pad); - return -ENOMEM; - } - for (i = 0; i < MAX_SG; i++) { rctx->t_dst[i].addr = 0; rctx->t_dst[i].len = 0; @@ -439,8 +432,6 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) memcpy(areq->result, result, algt->alg.hash.halg.digestsize); theend: - kfree(pad); - kfree(result); local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); local_bh_enable(); diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index 57ada8653855..eb82ee5345ae 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -123,6 +123,8 @@ struct sginfo { * @stat_req: number of request done by this flow * @iv: list of IV to use for each step * @biv: buffer which contain the backuped IV + * @pad: padding buffer for hash operations + * @result: buffer for storing the result of hash operations */ struct sun8i_ss_flow { struct crypto_engine *engine; @@ -130,6 +132,8 @@ struct sun8i_ss_flow { int status; u8 *iv[MAX_SG]; u8 *biv; + void *pad; + void *result; #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG unsigned long stat_req; #endif From db0c62bcd4809be50dc952bfade5adf4baf52023 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:20 +0000 Subject: [PATCH 089/116] crypto: sun8i-ss - do not zeroize all pad Instead of memset all pad buffer, it is faster to only put 0 where needed. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 9582ac450d08..53e5bfb99c93 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -319,7 +319,7 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) unsigned int len; u64 fill, min_fill, byte_count; void *pad, *result; - int j, i, todo; + int j, i, k, todo; __be64 *bebits; __le64 *lebits; dma_addr_t addr_res, addr_pad; @@ -334,7 +334,6 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) result = ss->flows[rctx->flow].result; pad = ss->flows[rctx->flow].pad; - memset(pad, 0, algt->alg.hash.halg.base.cra_blocksize * 2); bf = (__le32 *)pad; for (i = 0; i < MAX_SG; i++) { @@ -395,7 +394,10 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) if (fill < min_fill) fill += 64; + k = j; j += (fill - min_fill) / sizeof(u32); + for (; k < j; k++) + bf[k] = 0; switch (algt->ss_algo_id) { case SS_ID_HASH_MD5: From c35e523a8b6962012a589e4a41f56015c31a3397 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:21 +0000 Subject: [PATCH 090/116] crypto: sun8i-ss - handle requests if last block is not modulo 64 The current sun8i-ss handle only requests with all SG length being modulo 64. But the last SG could be always handled by copying it on the pad buffer. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../crypto/allwinner/sun8i-ss/sun8i-ss-core.c | 2 +- .../crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 35 ++++++++++++++----- drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h | 2 ++ 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 786b6f5cf300..8d31fd4968f3 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -487,7 +487,7 @@ static int allocate_flows(struct sun8i_ss_dev *ss) } /* the padding could be up to two block. */ - ss->flows[i].pad = devm_kmalloc(ss->dev, SHA256_BLOCK_SIZE * 2, + ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE, GFP_KERNEL | GFP_DMA); if (!ss->flows[i].pad) goto error_engine; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 53e5bfb99c93..1b44c1a115d6 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -262,6 +263,9 @@ static bool sun8i_ss_hash_need_fallback(struct ahash_request *areq) if (areq->nbytes == 0) return true; + if (areq->nbytes >= MAX_PAD_SIZE - 64) + return true; + /* we need to reserve one SG for the padding one */ if (sg_nents(areq->src) > MAX_SG - 1) return true; @@ -270,10 +274,13 @@ static bool sun8i_ss_hash_need_fallback(struct ahash_request *areq) /* SS can operate hash only on full block size * since SS support only MD5,sha1,sha224 and sha256, blocksize * is always 64 - * TODO: handle request if last SG is not len%64 - * but this will need to copy data on a new SG of size=64 */ - if (sg->length % 64 || !IS_ALIGNED(sg->offset, sizeof(u32))) + /* Only the last block could be bounced to the pad buffer */ + if (sg->length % 64 && sg_next(sg)) + return true; + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return true; + if (sg->length % 4) return true; sg = sg_next(sg); } @@ -361,6 +368,7 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) goto theend; } + j = 0; len = areq->nbytes; sg = areq->src; i = 0; @@ -369,12 +377,19 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) sg = sg_next(sg); continue; } - rctx->t_src[i].addr = sg_dma_address(sg); todo = min(len, sg_dma_len(sg)); - rctx->t_src[i].len = todo / 4; - len -= todo; - rctx->t_dst[i].addr = addr_res; - rctx->t_dst[i].len = digestsize / 4; + /* only the last SG could be with a size not modulo64 */ + if (todo % 64 == 0) { + rctx->t_src[i].addr = sg_dma_address(sg); + rctx->t_src[i].len = todo / 4; + rctx->t_dst[i].addr = addr_res; + rctx->t_dst[i].len = digestsize / 4; + len -= todo; + } else { + scatterwalk_map_and_copy(bf, sg, 0, todo, 0); + j += todo / 4; + len -= todo; + } sg = sg_next(sg); i++; } @@ -384,8 +399,10 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) goto theend; } + if (j > 0) + i--; + byte_count = areq->nbytes; - j = 0; bf[j++] = cpu_to_le32(0x80); fill = 64 - (byte_count % 64); diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index eb82ee5345ae..2e3524654aca 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -82,6 +82,8 @@ #define PRNG_DATA_SIZE (160 / 8) #define PRNG_SEED_SIZE DIV_ROUND_UP(175, 8) +#define MAX_PAD_SIZE 4096 + /* * struct ss_clock - Describe clocks used by sun8i-ss * @name: Name of clock needed by this variant From f95f61d0b2f152c1ab7928ad57bff1ab7257657f Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:22 +0000 Subject: [PATCH 091/116] crypto: sun8i-ss - rework debugging The "Fallback for xxx" message is annoying, remove it and store the information in the debugfs. In the same time, reports more fallback statistics. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun8i-ss/sun8i-ss-cipher.c | 41 ++++++++++++++----- .../crypto/allwinner/sun8i-ss/sun8i-ss-core.c | 21 ++++++++++ .../crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 36 ++++++++++++---- drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h | 7 +++- 4 files changed, 83 insertions(+), 22 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index c4cb1ab1eeaa..7f1940c6cc41 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -22,35 +22,54 @@ static bool sun8i_ss_need_fallback(struct skcipher_request *areq) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct sun8i_ss_alg_template *algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher); struct scatterlist *in_sg = areq->src; struct scatterlist *out_sg = areq->dst; struct scatterlist *sg; - if (areq->cryptlen == 0 || areq->cryptlen % 16) + if (areq->cryptlen == 0 || areq->cryptlen % 16) { + algt->stat_fb_len++; return true; + } if (sg_nents_for_len(areq->src, areq->cryptlen) > 8 || - sg_nents_for_len(areq->dst, areq->cryptlen) > 8) + sg_nents_for_len(areq->dst, areq->cryptlen) > 8) { + algt->stat_fb_sgnum++; return true; + } sg = areq->src; while (sg) { - if ((sg->length % 16) != 0) + if ((sg->length % 16) != 0) { + algt->stat_fb_sglen++; return true; - if ((sg_dma_len(sg) % 16) != 0) + } + if ((sg_dma_len(sg) % 16) != 0) { + algt->stat_fb_sglen++; return true; - if (!IS_ALIGNED(sg->offset, 16)) + } + if (!IS_ALIGNED(sg->offset, 16)) { + algt->stat_fb_align++; return true; + } sg = sg_next(sg); } sg = areq->dst; while (sg) { - if ((sg->length % 16) != 0) + if ((sg->length % 16) != 0) { + algt->stat_fb_sglen++; return true; - if ((sg_dma_len(sg) % 16) != 0) + } + if ((sg_dma_len(sg) % 16) != 0) { + algt->stat_fb_sglen++; return true; - if (!IS_ALIGNED(sg->offset, 16)) + } + if (!IS_ALIGNED(sg->offset, 16)) { + algt->stat_fb_align++; return true; + } sg = sg_next(sg); } @@ -385,9 +404,9 @@ int sun8i_ss_cipher_init(struct crypto_tfm *tfm) crypto_skcipher_reqsize(op->fallback_tfm); - dev_info(op->ss->dev, "Fallback for %s is %s\n", - crypto_tfm_alg_driver_name(&sktfm->base), - crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm))); + memcpy(algt->fbname, + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)), + CRYPTO_MAX_ALG_NAME); op->enginectx.op.do_one_request = sun8i_ss_handle_cipher_request; op->enginectx.op.prepare_request = NULL; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 8d31fd4968f3..f09de5737e8b 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -430,6 +430,17 @@ static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v) ss_algs[i].alg.skcipher.base.cra_driver_name, ss_algs[i].alg.skcipher.base.cra_name, ss_algs[i].stat_req, ss_algs[i].stat_fb); + + seq_printf(seq, "\tLast fallback is: %s\n", + ss_algs[i].fbname); + seq_printf(seq, "\tFallback due to length: %lu\n", + ss_algs[i].stat_fb_len); + seq_printf(seq, "\tFallback due to SG length: %lu\n", + ss_algs[i].stat_fb_sglen); + seq_printf(seq, "\tFallback due to alignment: %lu\n", + ss_algs[i].stat_fb_align); + seq_printf(seq, "\tFallback due to SG numbers: %lu\n", + ss_algs[i].stat_fb_sgnum); break; case CRYPTO_ALG_TYPE_RNG: seq_printf(seq, "%s %s reqs=%lu tsize=%lu\n", @@ -442,6 +453,16 @@ static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v) ss_algs[i].alg.hash.halg.base.cra_driver_name, ss_algs[i].alg.hash.halg.base.cra_name, ss_algs[i].stat_req, ss_algs[i].stat_fb); + seq_printf(seq, "\tLast fallback is: %s\n", + ss_algs[i].fbname); + seq_printf(seq, "\tFallback due to length: %lu\n", + ss_algs[i].stat_fb_len); + seq_printf(seq, "\tFallback due to SG length: %lu\n", + ss_algs[i].stat_fb_sglen); + seq_printf(seq, "\tFallback due to alignment: %lu\n", + ss_algs[i].stat_fb_align); + seq_printf(seq, "\tFallback due to SG numbers: %lu\n", + ss_algs[i].stat_fb_sgnum); break; } } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 1b44c1a115d6..cb510ec21ec4 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -51,9 +51,8 @@ int sun8i_ss_hash_crainit(struct crypto_tfm *tfm) sizeof(struct sun8i_ss_hash_reqctx) + crypto_ahash_reqsize(op->fallback_tfm)); - dev_info(op->ss->dev, "Fallback for %s is %s\n", - crypto_tfm_alg_driver_name(tfm), - crypto_tfm_alg_driver_name(&op->fallback_tfm->base)); + memcpy(algt->fbname, crypto_tfm_alg_driver_name(&op->fallback_tfm->base), CRYPTO_MAX_ALG_NAME); + err = pm_runtime_get_sync(op->ss->dev); if (err < 0) goto error_pm; @@ -259,16 +258,29 @@ static int sun8i_ss_run_hash_task(struct sun8i_ss_dev *ss, static bool sun8i_ss_hash_need_fallback(struct ahash_request *areq) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); + struct sun8i_ss_alg_template *algt; struct scatterlist *sg; - if (areq->nbytes == 0) + algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash); + + if (areq->nbytes == 0) { + algt->stat_fb_len++; return true; - if (areq->nbytes >= MAX_PAD_SIZE - 64) + } + + if (areq->nbytes >= MAX_PAD_SIZE - 64) { + algt->stat_fb_len++; return true; + } /* we need to reserve one SG for the padding one */ - if (sg_nents(areq->src) > MAX_SG - 1) + if (sg_nents(areq->src) > MAX_SG - 1) { + algt->stat_fb_sgnum++; return true; + } + sg = areq->src; while (sg) { /* SS can operate hash only on full block size @@ -276,12 +288,18 @@ static bool sun8i_ss_hash_need_fallback(struct ahash_request *areq) * is always 64 */ /* Only the last block could be bounced to the pad buffer */ - if (sg->length % 64 && sg_next(sg)) + if (sg->length % 64 && sg_next(sg)) { + algt->stat_fb_sglen++; return true; - if (!IS_ALIGNED(sg->offset, sizeof(u32))) + } + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { + algt->stat_fb_align++; return true; - if (sg->length % 4) + } + if (sg->length % 4) { + algt->stat_fb_sglen++; return true; + } sg = sg_next(sg); } return false; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index 2e3524654aca..b56038de333b 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -279,11 +279,14 @@ struct sun8i_ss_alg_template { struct rng_alg rng; struct ahash_alg hash; } alg; -#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG unsigned long stat_req; unsigned long stat_fb; unsigned long stat_bytes; -#endif + unsigned long stat_fb_len; + unsigned long stat_fb_sglen; + unsigned long stat_fb_align; + unsigned long stat_fb_sgnum; + char fbname[CRYPTO_MAX_ALG_NAME]; }; int sun8i_ss_enqueue(struct crypto_async_request *areq, u32 type); From e76ee4db9eb8f4a423bf3350f4348449b4339073 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:23 +0000 Subject: [PATCH 092/116] crypto: sun8i-ss - Add function for handling hash padding Move all padding work to a dedicated function. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 87 ++++++++++++++----- 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index cb510ec21ec4..0db1e8253667 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -328,6 +328,64 @@ int sun8i_ss_hash_digest(struct ahash_request *areq) return crypto_transfer_hash_request_to_engine(engine, areq); } +static u64 hash_pad(__le32 *buf, unsigned int bufsize, u64 padi, u64 byte_count, bool le, int bs) +{ + u64 fill, min_fill, j, k; + __be64 *bebits; + __le64 *lebits; + + j = padi; + buf[j++] = cpu_to_le32(0x80); + + if (bs == 64) { + fill = 64 - (byte_count % 64); + min_fill = 2 * sizeof(u32) + sizeof(u32); + } else { + fill = 128 - (byte_count % 128); + min_fill = 4 * sizeof(u32) + sizeof(u32); + } + + if (fill < min_fill) + fill += bs; + + k = j; + j += (fill - min_fill) / sizeof(u32); + if (j * 4 > bufsize) { + pr_err("%s OVERFLOW %llu\n", __func__, j); + return 0; + } + for (; k < j; k++) + buf[k] = 0; + + if (le) { + /* MD5 */ + lebits = (__le64 *)&buf[j]; + *lebits = cpu_to_le64(byte_count << 3); + j += 2; + } else { + if (bs == 64) { + /* sha1 sha224 sha256 */ + bebits = (__be64 *)&buf[j]; + *bebits = cpu_to_be64(byte_count << 3); + j += 2; + } else { + /* sha384 sha512*/ + bebits = (__be64 *)&buf[j]; + *bebits = cpu_to_be64(byte_count >> 61); + j += 2; + bebits = (__be64 *)&buf[j]; + *bebits = cpu_to_be64(byte_count << 3); + j += 2; + } + } + if (j * 4 > bufsize) { + pr_err("%s OVERFLOW %llu\n", __func__, j); + return 0; + } + + return j; +} + /* sun8i_ss_hash_run - run an ahash request * Send the data of the request to the SS along with an extra SG with padding */ @@ -342,11 +400,9 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) struct scatterlist *sg; int nr_sgs, err, digestsize; unsigned int len; - u64 fill, min_fill, byte_count; + u64 byte_count; void *pad, *result; int j, i, k, todo; - __be64 *bebits; - __le64 *lebits; dma_addr_t addr_res, addr_pad; __le32 *bf; @@ -421,33 +477,20 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) i--; byte_count = areq->nbytes; - bf[j++] = cpu_to_le32(0x80); - - fill = 64 - (byte_count % 64); - min_fill = 3 * sizeof(u32); - - if (fill < min_fill) - fill += 64; - - k = j; - j += (fill - min_fill) / sizeof(u32); - for (; k < j; k++) - bf[k] = 0; - switch (algt->ss_algo_id) { case SS_ID_HASH_MD5: - lebits = (__le64 *)&bf[j]; - *lebits = cpu_to_le64(byte_count << 3); - j += 2; + j = hash_pad(bf, 4096, j, byte_count, true, bs); break; case SS_ID_HASH_SHA1: case SS_ID_HASH_SHA224: case SS_ID_HASH_SHA256: - bebits = (__be64 *)&bf[j]; - *bebits = cpu_to_be64(byte_count << 3); - j += 2; + j = hash_pad(bf, 4096, j, byte_count, false, bs); break; } + if (!j) { + err = -EINVAL; + goto theend; + } addr_pad = dma_map_single(ss->dev, pad, j * 4, DMA_TO_DEVICE); if (dma_mapping_error(ss->dev, addr_pad)) { From 801b7d572c0ad46b116730121e9d3beefb0c7004 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:24 +0000 Subject: [PATCH 093/116] crypto: sun8i-ss - add hmac(sha1) Even if sun8i-ss does not handle hmac(sha1) directly, we can provide one which use the already supported acceleration of sha1. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../crypto/allwinner/sun8i-ss/sun8i-ss-core.c | 31 +++ .../crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 204 +++++++++++++++++- drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h | 6 + 3 files changed, 233 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index f09de5737e8b..98593a0cff69 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -409,6 +409,37 @@ static struct sun8i_ss_alg_template ss_algs[] = { } } }, +{ .type = CRYPTO_ALG_TYPE_AHASH, + .ss_algo_id = SS_ID_HASH_SHA1, + .alg.hash = { + .init = sun8i_ss_hash_init, + .update = sun8i_ss_hash_update, + .final = sun8i_ss_hash_final, + .finup = sun8i_ss_hash_finup, + .digest = sun8i_ss_hash_digest, + .export = sun8i_ss_hash_export, + .import = sun8i_ss_hash_import, + .setkey = sun8i_ss_hmac_setkey, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "hmac-sha1-sun8i-ss", + .cra_priority = 300, + .cra_alignmask = 3, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sun8i_ss_hash_tfm_ctx), + .cra_module = THIS_MODULE, + .cra_init = sun8i_ss_hash_crainit, + .cra_exit = sun8i_ss_hash_craexit, + } + } + } +}, #endif }; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 0db1e8253667..ac417a6b39e5 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -14,12 +14,99 @@ #include #include #include +#include #include #include #include #include #include "sun8i-ss.h" +static int sun8i_ss_hashkey(struct sun8i_ss_hash_tfm_ctx *tfmctx, const u8 *key, + unsigned int keylen) +{ + struct crypto_shash *xtfm; + struct shash_desc *sdesc; + size_t len; + int ret = 0; + + xtfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_NEED_FALLBACK); + if (!xtfm) + return -ENOMEM; + + len = sizeof(*sdesc) + crypto_shash_descsize(xtfm); + sdesc = kmalloc(len, GFP_KERNEL); + if (!sdesc) { + ret = -ENOMEM; + goto err_hashkey_sdesc; + } + sdesc->tfm = xtfm; + + ret = crypto_shash_init(sdesc); + if (ret) { + dev_err(tfmctx->ss->dev, "shash init error ret=%d\n", ret); + goto err_hashkey; + } + ret = crypto_shash_finup(sdesc, key, keylen, tfmctx->key); + if (ret) + dev_err(tfmctx->ss->dev, "shash finup error\n"); +err_hashkey: + kfree(sdesc); +err_hashkey_sdesc: + crypto_free_shash(xtfm); + return ret; +} + +int sun8i_ss_hmac_setkey(struct crypto_ahash *ahash, const u8 *key, + unsigned int keylen) +{ + struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(ahash); + struct ahash_alg *alg = __crypto_ahash_alg(ahash->base.__crt_alg); + struct sun8i_ss_alg_template *algt; + int digestsize, i; + int bs = crypto_ahash_blocksize(ahash); + int ret; + + algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash); + digestsize = algt->alg.hash.halg.digestsize; + + if (keylen > bs) { + ret = sun8i_ss_hashkey(tfmctx, key, keylen); + if (ret) + return ret; + tfmctx->keylen = digestsize; + } else { + tfmctx->keylen = keylen; + memcpy(tfmctx->key, key, keylen); + } + + tfmctx->ipad = kzalloc(bs, GFP_KERNEL | GFP_DMA); + if (!tfmctx->ipad) + return -ENOMEM; + tfmctx->opad = kzalloc(bs, GFP_KERNEL | GFP_DMA); + if (!tfmctx->opad) { + ret = -ENOMEM; + goto err_opad; + } + + memset(tfmctx->key + tfmctx->keylen, 0, bs - tfmctx->keylen); + memcpy(tfmctx->ipad, tfmctx->key, tfmctx->keylen); + memcpy(tfmctx->opad, tfmctx->key, tfmctx->keylen); + for (i = 0; i < bs; i++) { + tfmctx->ipad[i] ^= HMAC_IPAD_VALUE; + tfmctx->opad[i] ^= HMAC_OPAD_VALUE; + } + + ret = crypto_ahash_setkey(tfmctx->fallback_tfm, key, keylen); + if (!ret) + return 0; + + memzero_explicit(tfmctx->key, keylen); + kfree_sensitive(tfmctx->opad); +err_opad: + kfree_sensitive(tfmctx->ipad); + return ret; +} + int sun8i_ss_hash_crainit(struct crypto_tfm *tfm) { struct sun8i_ss_hash_tfm_ctx *op = crypto_tfm_ctx(tfm); @@ -67,6 +154,9 @@ void sun8i_ss_hash_craexit(struct crypto_tfm *tfm) { struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_tfm_ctx(tfm); + kfree_sensitive(tfmctx->ipad); + kfree_sensitive(tfmctx->opad); + crypto_free_ahash(tfmctx->fallback_tfm); pm_runtime_put_sync_suspend(tfmctx->ss->dev); } @@ -393,18 +483,26 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) { struct ahash_request *areq = container_of(breq, struct ahash_request, base); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm); struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); struct sun8i_ss_hash_reqctx *rctx = ahash_request_ctx(areq); struct sun8i_ss_alg_template *algt; struct sun8i_ss_dev *ss; struct scatterlist *sg; + int bs = crypto_ahash_blocksize(tfm); int nr_sgs, err, digestsize; unsigned int len; u64 byte_count; void *pad, *result; int j, i, k, todo; - dma_addr_t addr_res, addr_pad; + dma_addr_t addr_res, addr_pad, addr_xpad; __le32 *bf; + /* HMAC step: + * 0: normal hashing + * 1: IPAD + * 2: OPAD + */ + int hmac = 0; algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash); ss = algt->ss; @@ -439,7 +537,7 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) if (dma_mapping_error(ss->dev, addr_res)) { dev_err(ss->dev, "DMA map dest\n"); err = -EINVAL; - goto theend; + goto err_dma_result; } j = 0; @@ -476,7 +574,60 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) if (j > 0) i--; +retry: byte_count = areq->nbytes; + if (tfmctx->keylen && hmac == 0) { + hmac = 1; + /* shift all SG one slot up, to free slot 0 for IPAD */ + for (k = 6; k >= 0; k--) { + rctx->t_src[k + 1].addr = rctx->t_src[k].addr; + rctx->t_src[k + 1].len = rctx->t_src[k].len; + rctx->t_dst[k + 1].addr = rctx->t_dst[k].addr; + rctx->t_dst[k + 1].len = rctx->t_dst[k].len; + } + addr_xpad = dma_map_single(ss->dev, tfmctx->ipad, bs, DMA_TO_DEVICE); + if (dma_mapping_error(ss->dev, addr_xpad)) { + dev_err(ss->dev, "Fail to create DMA mapping of ipad\n"); + goto err_dma_xpad; + } + rctx->t_src[0].addr = addr_xpad; + rctx->t_src[0].len = bs / 4; + rctx->t_dst[0].addr = addr_res; + rctx->t_dst[0].len = digestsize / 4; + i++; + byte_count = areq->nbytes + bs; + } + if (tfmctx->keylen && hmac == 2) { + for (i = 0; i < MAX_SG; i++) { + rctx->t_src[i].addr = 0; + rctx->t_src[i].len = 0; + rctx->t_dst[i].addr = 0; + rctx->t_dst[i].len = 0; + } + + addr_res = dma_map_single(ss->dev, result, digestsize, DMA_FROM_DEVICE); + if (dma_mapping_error(ss->dev, addr_res)) { + dev_err(ss->dev, "Fail to create DMA mapping of result\n"); + err = -EINVAL; + goto err_dma_result; + } + addr_xpad = dma_map_single(ss->dev, tfmctx->opad, bs, DMA_TO_DEVICE); + if (dma_mapping_error(ss->dev, addr_xpad)) { + dev_err(ss->dev, "Fail to create DMA mapping of opad\n"); + goto err_dma_xpad; + } + rctx->t_src[0].addr = addr_xpad; + rctx->t_src[0].len = bs / 4; + + memcpy(bf, result, digestsize); + j = digestsize / 4; + i = 1; + byte_count = digestsize + bs; + + rctx->t_dst[0].addr = addr_res; + rctx->t_dst[0].len = digestsize / 4; + } + switch (algt->ss_algo_id) { case SS_ID_HASH_MD5: j = hash_pad(bf, 4096, j, byte_count, true, bs); @@ -496,7 +647,7 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) if (dma_mapping_error(ss->dev, addr_pad)) { dev_err(ss->dev, "DMA error on padding SG\n"); err = -EINVAL; - goto theend; + goto err_dma_pad; } rctx->t_src[i].addr = addr_pad; rctx->t_src[i].len = j; @@ -505,12 +656,49 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) err = sun8i_ss_run_hash_task(ss, rctx, crypto_tfm_alg_name(areq->base.tfm)); - dma_unmap_single(ss->dev, addr_pad, j * 4, DMA_TO_DEVICE); - dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src), - DMA_TO_DEVICE); - dma_unmap_single(ss->dev, addr_res, digestsize, DMA_FROM_DEVICE); + /* + * mini helper for checking dma map/unmap + * flow start for hmac = 0 (and HMAC = 1) + * HMAC = 0 + * MAP src + * MAP res + * + * retry: + * if hmac then hmac = 1 + * MAP xpad (ipad) + * if hmac == 2 + * MAP res + * MAP xpad (opad) + * MAP pad + * ACTION! + * UNMAP pad + * if hmac + * UNMAP xpad + * UNMAP res + * if hmac < 2 + * UNMAP SRC + * + * if hmac = 1 then hmac = 2 goto retry + */ - memcpy(areq->result, result, algt->alg.hash.halg.digestsize); + dma_unmap_single(ss->dev, addr_pad, j * 4, DMA_TO_DEVICE); + +err_dma_pad: + if (hmac > 0) + dma_unmap_single(ss->dev, addr_xpad, bs, DMA_TO_DEVICE); +err_dma_xpad: + dma_unmap_single(ss->dev, addr_res, digestsize, DMA_FROM_DEVICE); +err_dma_result: + if (hmac < 2) + dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src), + DMA_TO_DEVICE); + if (hmac == 1 && !err) { + hmac = 2; + goto retry; + } + + if (!err) + memcpy(areq->result, result, algt->alg.hash.halg.digestsize); theend: local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index b56038de333b..df6f08f6092f 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -239,6 +239,10 @@ struct sun8i_ss_hash_tfm_ctx { struct crypto_engine_ctx enginectx; struct crypto_ahash *fallback_tfm; struct sun8i_ss_dev *ss; + u8 *ipad; + u8 *opad; + u8 key[SHA256_BLOCK_SIZE]; + int keylen; }; /* @@ -319,3 +323,5 @@ int sun8i_ss_hash_update(struct ahash_request *areq); int sun8i_ss_hash_finup(struct ahash_request *areq); int sun8i_ss_hash_digest(struct ahash_request *areq); int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq); +int sun8i_ss_hmac_setkey(struct crypto_ahash *ahash, const u8 *key, + unsigned int keylen); From 2e5545acf6584c196b4f4e1c1eea017a48699926 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:25 +0000 Subject: [PATCH 094/116] crypto: sun8i-ss - do not fallback if cryptlen is less than sg length The sg length could be more than remaining data on it. So check the length requirement against the minimum between those two values. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun8i-ss/sun8i-ss-cipher.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 7f1940c6cc41..5bb950182026 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -28,6 +28,7 @@ static bool sun8i_ss_need_fallback(struct skcipher_request *areq) struct scatterlist *in_sg = areq->src; struct scatterlist *out_sg = areq->dst; struct scatterlist *sg; + unsigned int todo, len; if (areq->cryptlen == 0 || areq->cryptlen % 16) { algt->stat_fb_len++; @@ -40,13 +41,11 @@ static bool sun8i_ss_need_fallback(struct skcipher_request *areq) return true; } + len = areq->cryptlen; sg = areq->src; while (sg) { - if ((sg->length % 16) != 0) { - algt->stat_fb_sglen++; - return true; - } - if ((sg_dma_len(sg) % 16) != 0) { + todo = min(len, sg->length); + if ((todo % 16) != 0) { algt->stat_fb_sglen++; return true; } @@ -54,15 +53,14 @@ static bool sun8i_ss_need_fallback(struct skcipher_request *areq) algt->stat_fb_align++; return true; } + len -= todo; sg = sg_next(sg); } + len = areq->cryptlen; sg = areq->dst; while (sg) { - if ((sg->length % 16) != 0) { - algt->stat_fb_sglen++; - return true; - } - if ((sg_dma_len(sg) % 16) != 0) { + todo = min(len, sg->length); + if ((todo % 16) != 0) { algt->stat_fb_sglen++; return true; } @@ -70,6 +68,7 @@ static bool sun8i_ss_need_fallback(struct skcipher_request *areq) algt->stat_fb_align++; return true; } + len -= todo; sg = sg_next(sg); } From 8a1714ad1a33f7dcdebabd67cbe7af423ddefe2b Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:26 +0000 Subject: [PATCH 095/116] crypto: sun8i-ce - Add function for handling hash padding Move all padding work to a dedicated function. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../crypto/allwinner/sun8i-ce/sun8i-ce-hash.c | 95 +++++++++++++------ 1 file changed, 65 insertions(+), 30 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index 859b7522faaa..1c82cd510c75 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -248,6 +248,64 @@ int sun8i_ce_hash_digest(struct ahash_request *areq) return crypto_transfer_hash_request_to_engine(engine, areq); } +static u64 hash_pad(__le32 *buf, unsigned int bufsize, u64 padi, u64 byte_count, bool le, int bs) +{ + u64 fill, min_fill, j, k; + __be64 *bebits; + __le64 *lebits; + + j = padi; + buf[j++] = cpu_to_le32(0x80); + + if (bs == 64) { + fill = 64 - (byte_count % 64); + min_fill = 2 * sizeof(u32) + sizeof(u32); + } else { + fill = 128 - (byte_count % 128); + min_fill = 4 * sizeof(u32) + sizeof(u32); + } + + if (fill < min_fill) + fill += bs; + + k = j; + j += (fill - min_fill) / sizeof(u32); + if (j * 4 > bufsize) { + pr_err("%s OVERFLOW %llu\n", __func__, j); + return 0; + } + for (; k < j; k++) + buf[k] = 0; + + if (le) { + /* MD5 */ + lebits = (__le64 *)&buf[j]; + *lebits = cpu_to_le64(byte_count << 3); + j += 2; + } else { + if (bs == 64) { + /* sha1 sha224 sha256 */ + bebits = (__be64 *)&buf[j]; + *bebits = cpu_to_be64(byte_count << 3); + j += 2; + } else { + /* sha384 sha512*/ + bebits = (__be64 *)&buf[j]; + *bebits = cpu_to_be64(byte_count >> 61); + j += 2; + bebits = (__be64 *)&buf[j]; + *bebits = cpu_to_be64(byte_count << 3); + j += 2; + } + } + if (j * 4 > bufsize) { + pr_err("%s OVERFLOW %llu\n", __func__, j); + return 0; + } + + return j; +} + int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) { struct ahash_request *areq = container_of(breq, struct ahash_request, base); @@ -266,10 +324,6 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) __le32 *bf; void *buf = NULL; int j, i, todo; - int nbw = 0; - u64 fill, min_fill; - __be64 *bebits; - __le64 *lebits; void *result = NULL; u64 bs; int digestsize; @@ -348,44 +402,25 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) byte_count = areq->nbytes; j = 0; - bf[j++] = cpu_to_le32(0x80); - - if (bs == 64) { - fill = 64 - (byte_count % 64); - min_fill = 2 * sizeof(u32) + (nbw ? 0 : sizeof(u32)); - } else { - fill = 128 - (byte_count % 128); - min_fill = 4 * sizeof(u32) + (nbw ? 0 : sizeof(u32)); - } - - if (fill < min_fill) - fill += bs; - - j += (fill - min_fill) / sizeof(u32); switch (algt->ce_algo_id) { case CE_ID_HASH_MD5: - lebits = (__le64 *)&bf[j]; - *lebits = cpu_to_le64(byte_count << 3); - j += 2; + j = hash_pad(bf, 2 * bs, j, byte_count, true, bs); break; case CE_ID_HASH_SHA1: case CE_ID_HASH_SHA224: case CE_ID_HASH_SHA256: - bebits = (__be64 *)&bf[j]; - *bebits = cpu_to_be64(byte_count << 3); - j += 2; + j = hash_pad(bf, 2 * bs, j, byte_count, false, bs); break; case CE_ID_HASH_SHA384: case CE_ID_HASH_SHA512: - bebits = (__be64 *)&bf[j]; - *bebits = cpu_to_be64(byte_count >> 61); - j += 2; - bebits = (__be64 *)&bf[j]; - *bebits = cpu_to_be64(byte_count << 3); - j += 2; + j = hash_pad(bf, 2 * bs, j, byte_count, false, bs); break; } + if (!j) { + err = -EINVAL; + goto theend; + } addr_pad = dma_map_single(ce->dev, buf, j * 4, DMA_TO_DEVICE); cet->t_src[i].addr = cpu_to_le32(addr_pad); From 6b8309faf0ca17c819d3c9a1a262467f9dc3bd58 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:27 +0000 Subject: [PATCH 096/116] crypto: sun8i-ce - use sg_nents_for_len When testing with some large SG list, the sun8i-ce drivers always fallback even if it can handle it. So use sg_nents_for_len() which permits to see less SGs than needed. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun8i-ce/sun8i-ce-cipher.c | 23 ++++++++----------- .../crypto/allwinner/sun8i-ce/sun8i-ce-hash.c | 10 ++++---- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 0b1ce58bdeb9..35ab71d3a82d 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -26,7 +26,8 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); struct scatterlist *sg; - if (sg_nents(areq->src) > MAX_SG || sg_nents(areq->dst) > MAX_SG) + if (sg_nents_for_len(areq->src, areq->cryptlen) > MAX_SG || + sg_nents_for_len(areq->dst, areq->cryptlen) > MAX_SG) return true; if (areq->cryptlen < crypto_skcipher_ivsize(tfm)) @@ -94,6 +95,8 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req int nr_sgs = 0; int nr_sgd = 0; int err = 0; + int ns = sg_nents_for_len(areq->src, areq->cryptlen); + int nd = sg_nents_for_len(areq->dst, areq->cryptlen); algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher); @@ -169,8 +172,7 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req } if (areq->src == areq->dst) { - nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src), - DMA_BIDIRECTIONAL); + nr_sgs = dma_map_sg(ce->dev, areq->src, ns, DMA_BIDIRECTIONAL); if (nr_sgs <= 0 || nr_sgs > MAX_SG) { dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs); err = -EINVAL; @@ -178,15 +180,13 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req } nr_sgd = nr_sgs; } else { - nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src), - DMA_TO_DEVICE); + nr_sgs = dma_map_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); if (nr_sgs <= 0 || nr_sgs > MAX_SG) { dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs); err = -EINVAL; goto theend_iv; } - nr_sgd = dma_map_sg(ce->dev, areq->dst, sg_nents(areq->dst), - DMA_FROM_DEVICE); + nr_sgd = dma_map_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE); if (nr_sgd <= 0 || nr_sgd > MAX_SG) { dev_err(ce->dev, "Invalid sg number %d\n", nr_sgd); err = -EINVAL; @@ -231,14 +231,11 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req theend_sgs: if (areq->src == areq->dst) { - dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src), - DMA_BIDIRECTIONAL); + dma_unmap_sg(ce->dev, areq->src, ns, DMA_BIDIRECTIONAL); } else { if (nr_sgs > 0) - dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src), - DMA_TO_DEVICE); - dma_unmap_sg(ce->dev, areq->dst, sg_nents(areq->dst), - DMA_FROM_DEVICE); + dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); + dma_unmap_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE); } theend_iv: diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index 1c82cd510c75..59e07eb5f058 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -204,7 +204,7 @@ static bool sun8i_ce_hash_need_fallback(struct ahash_request *areq) if (areq->nbytes == 0) return true; /* we need to reserve one SG for padding one */ - if (sg_nents(areq->src) > MAX_SG - 1) + if (sg_nents_for_len(areq->src, areq->nbytes) > MAX_SG - 1) return true; sg = areq->src; while (sg) { @@ -229,7 +229,7 @@ int sun8i_ce_hash_digest(struct ahash_request *areq) if (sun8i_ce_hash_need_fallback(areq)) return sun8i_ce_hash_digest_fb(areq); - nr_sgs = sg_nents(areq->src); + nr_sgs = sg_nents_for_len(areq->src, areq->nbytes); if (nr_sgs > MAX_SG - 1) return sun8i_ce_hash_digest_fb(areq); @@ -328,6 +328,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) u64 bs; int digestsize; dma_addr_t addr_res, addr_pad; + int ns = sg_nents_for_len(areq->src, areq->nbytes); algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash); ce = algt->ce; @@ -372,7 +373,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) cet->t_sym_ctl = 0; cet->t_asym_ctl = 0; - nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE); + nr_sgs = dma_map_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); if (nr_sgs <= 0 || nr_sgs > MAX_SG) { dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs); err = -EINVAL; @@ -441,8 +442,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(areq->base.tfm)); dma_unmap_single(ce->dev, addr_pad, j * 4, DMA_TO_DEVICE); - dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src), - DMA_TO_DEVICE); + dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE); From aff388f7874653aea0b8087cfedec52336d2066a Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:28 +0000 Subject: [PATCH 097/116] crypto: sun8i-ce - rework debugging The "Fallback for xxx" message is annoying, remove it and store the information in the debugfs. Let's add more precise fallback stats and display it better. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- .../allwinner/sun8i-ce/sun8i-ce-cipher.c | 43 +++++++++++++++---- .../crypto/allwinner/sun8i-ce/sun8i-ce-core.c | 34 +++++++++++++-- .../crypto/allwinner/sun8i-ce/sun8i-ce-hash.c | 27 +++++++++--- drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h | 11 ++++- 4 files changed, 96 insertions(+), 19 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 35ab71d3a82d..315a62e424d6 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -25,27 +25,54 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); struct scatterlist *sg; + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct sun8i_ce_alg_template *algt; + + algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher); if (sg_nents_for_len(areq->src, areq->cryptlen) > MAX_SG || - sg_nents_for_len(areq->dst, areq->cryptlen) > MAX_SG) + sg_nents_for_len(areq->dst, areq->cryptlen) > MAX_SG) { + algt->stat_fb_maxsg++; return true; + } - if (areq->cryptlen < crypto_skcipher_ivsize(tfm)) + if (areq->cryptlen < crypto_skcipher_ivsize(tfm)) { + algt->stat_fb_leniv++; return true; + } - if (areq->cryptlen == 0 || areq->cryptlen % 16) + if (areq->cryptlen == 0) { + algt->stat_fb_len0++; return true; + } + + if (areq->cryptlen % 16) { + algt->stat_fb_mod16++; + return true; + } sg = areq->src; while (sg) { - if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32))) + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { + algt->stat_fb_srcali++; return true; + } + if (sg->length % 4) { + algt->stat_fb_srclen++; + return true; + } sg = sg_next(sg); } sg = areq->dst; while (sg) { - if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32))) + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { + algt->stat_fb_dstali++; return true; + } + if (sg->length % 4) { + algt->stat_fb_dstlen++; + return true; + } sg = sg_next(sg); } return false; @@ -384,9 +411,9 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm) sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx) + crypto_skcipher_reqsize(op->fallback_tfm); - dev_info(op->ce->dev, "Fallback for %s is %s\n", - crypto_tfm_alg_driver_name(&sktfm->base), - crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm))); + memcpy(algt->fbname, + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)), + CRYPTO_MAX_ALG_NAME); op->enginectx.op.do_one_request = sun8i_ce_cipher_run; op->enginectx.op.prepare_request = sun8i_ce_cipher_prepare; diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index eeaa856b8f81..9f6594699835 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -595,19 +595,47 @@ static int sun8i_ce_debugfs_show(struct seq_file *seq, void *v) continue; switch (ce_algs[i].type) { case CRYPTO_ALG_TYPE_SKCIPHER: - seq_printf(seq, "%s %s %lu %lu\n", + seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", ce_algs[i].alg.skcipher.base.cra_driver_name, ce_algs[i].alg.skcipher.base.cra_name, ce_algs[i].stat_req, ce_algs[i].stat_fb); + seq_printf(seq, "\tLast fallback is: %s\n", + ce_algs[i].fbname); + seq_printf(seq, "\tFallback due to 0 length: %lu\n", + ce_algs[i].stat_fb_len0); + seq_printf(seq, "\tFallback due to length !mod16: %lu\n", + ce_algs[i].stat_fb_mod16); + seq_printf(seq, "\tFallback due to length < IV: %lu\n", + ce_algs[i].stat_fb_leniv); + seq_printf(seq, "\tFallback due to source alignment: %lu\n", + ce_algs[i].stat_fb_srcali); + seq_printf(seq, "\tFallback due to dest alignment: %lu\n", + ce_algs[i].stat_fb_dstali); + seq_printf(seq, "\tFallback due to source length: %lu\n", + ce_algs[i].stat_fb_srclen); + seq_printf(seq, "\tFallback due to dest length: %lu\n", + ce_algs[i].stat_fb_dstlen); + seq_printf(seq, "\tFallback due to SG numbers: %lu\n", + ce_algs[i].stat_fb_maxsg); break; case CRYPTO_ALG_TYPE_AHASH: - seq_printf(seq, "%s %s %lu %lu\n", + seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", ce_algs[i].alg.hash.halg.base.cra_driver_name, ce_algs[i].alg.hash.halg.base.cra_name, ce_algs[i].stat_req, ce_algs[i].stat_fb); + seq_printf(seq, "\tLast fallback is: %s\n", + ce_algs[i].fbname); + seq_printf(seq, "\tFallback due to 0 length: %lu\n", + ce_algs[i].stat_fb_len0); + seq_printf(seq, "\tFallback due to length: %lu\n", + ce_algs[i].stat_fb_srclen); + seq_printf(seq, "\tFallback due to alignment: %lu\n", + ce_algs[i].stat_fb_srcali); + seq_printf(seq, "\tFallback due to SG numbers: %lu\n", + ce_algs[i].stat_fb_maxsg); break; case CRYPTO_ALG_TYPE_RNG: - seq_printf(seq, "%s %s %lu %lu\n", + seq_printf(seq, "%s %s reqs=%lu bytes=%lu\n", ce_algs[i].alg.rng.base.cra_driver_name, ce_algs[i].alg.rng.base.cra_name, ce_algs[i].stat_req, ce_algs[i].stat_bytes); diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index 59e07eb5f058..8b5b9b9d04c3 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -50,9 +50,9 @@ int sun8i_ce_hash_crainit(struct crypto_tfm *tfm) sizeof(struct sun8i_ce_hash_reqctx) + crypto_ahash_reqsize(op->fallback_tfm)); - dev_info(op->ce->dev, "Fallback for %s is %s\n", - crypto_tfm_alg_driver_name(tfm), - crypto_tfm_alg_driver_name(&op->fallback_tfm->base)); + memcpy(algt->fbname, crypto_tfm_alg_driver_name(&op->fallback_tfm->base), + CRYPTO_MAX_ALG_NAME); + err = pm_runtime_get_sync(op->ce->dev); if (err < 0) goto error_pm; @@ -199,17 +199,32 @@ static int sun8i_ce_hash_digest_fb(struct ahash_request *areq) static bool sun8i_ce_hash_need_fallback(struct ahash_request *areq) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); + struct sun8i_ce_alg_template *algt; struct scatterlist *sg; - if (areq->nbytes == 0) + algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash); + + if (areq->nbytes == 0) { + algt->stat_fb_len0++; return true; + } /* we need to reserve one SG for padding one */ - if (sg_nents_for_len(areq->src, areq->nbytes) > MAX_SG - 1) + if (sg_nents_for_len(areq->src, areq->nbytes) > MAX_SG - 1) { + algt->stat_fb_maxsg++; return true; + } sg = areq->src; while (sg) { - if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32))) + if (sg->length % 4) { + algt->stat_fb_srclen++; return true; + } + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { + algt->stat_fb_srcali++; + return true; + } sg = sg_next(sg); } return false; diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 23613a0ec9b0..8177aaba4434 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -333,11 +333,18 @@ struct sun8i_ce_alg_template { struct ahash_alg hash; struct rng_alg rng; } alg; -#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG unsigned long stat_req; unsigned long stat_fb; unsigned long stat_bytes; -#endif + unsigned long stat_fb_maxsg; + unsigned long stat_fb_leniv; + unsigned long stat_fb_len0; + unsigned long stat_fb_mod16; + unsigned long stat_fb_srcali; + unsigned long stat_fb_srclen; + unsigned long stat_fb_dstali; + unsigned long stat_fb_dstlen; + char fbname[CRYPTO_MAX_ALG_NAME]; }; int sun8i_ce_enqueue(struct crypto_async_request *areq, u32 type); From 42a01af3f13f09656af1f97d58e98415242eef45 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 2 May 2022 20:19:29 +0000 Subject: [PATCH 098/116] crypto: sun8i-ce - do not fallback if cryptlen is less than sg length The sg length could be more than remaining data on it. So check the length requirement against the minimum between those two values. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 315a62e424d6..74b4e910a38d 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -27,6 +27,7 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq) struct scatterlist *sg; struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct sun8i_ce_alg_template *algt; + unsigned int todo, len; algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher); @@ -51,28 +52,35 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq) return true; } + len = areq->cryptlen; sg = areq->src; while (sg) { if (!IS_ALIGNED(sg->offset, sizeof(u32))) { algt->stat_fb_srcali++; return true; } - if (sg->length % 4) { + todo = min(len, sg->length); + if (todo % 4) { algt->stat_fb_srclen++; return true; } + len -= todo; sg = sg_next(sg); } + + len = areq->cryptlen; sg = areq->dst; while (sg) { if (!IS_ALIGNED(sg->offset, sizeof(u32))) { algt->stat_fb_dstali++; return true; } - if (sg->length % 4) { + todo = min(len, sg->length); + if (todo % 4) { algt->stat_fb_dstlen++; return true; } + len -= todo; sg = sg_next(sg); } return false; From 91e8bcd7b4da182e09ea19a2c73167345fe14c98 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 4 May 2022 17:07:36 +0200 Subject: [PATCH 099/116] crypto: cryptd - Protect per-CPU resource by disabling BH. The access to cryptd_queue::cpu_queue is synchronized by disabling preemption in cryptd_enqueue_request() and disabling BH in cryptd_queue_worker(). This implies that access is allowed from BH. If cryptd_enqueue_request() is invoked from preemptible context _and_ soft interrupt then this can lead to list corruption since cryptd_enqueue_request() is not protected against access from soft interrupt. Replace get_cpu() in cryptd_enqueue_request() with local_bh_disable() to ensure BH is always disabled. Remove preempt_disable() from cryptd_queue_worker() since it is not needed because local_bh_disable() ensures synchronisation. Fixes: 254eff771441 ("crypto: cryptd - Per-CPU thread implementation...") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Herbert Xu --- crypto/cryptd.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index a1bea0f4baa8..668095eca0fa 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -39,6 +39,10 @@ struct cryptd_cpu_queue { }; struct cryptd_queue { + /* + * Protected by disabling BH to allow enqueueing from softinterrupt and + * dequeuing from kworker (cryptd_queue_worker()). + */ struct cryptd_cpu_queue __percpu *cpu_queue; }; @@ -125,28 +129,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue) static int cryptd_enqueue_request(struct cryptd_queue *queue, struct crypto_async_request *request) { - int cpu, err; + int err; struct cryptd_cpu_queue *cpu_queue; refcount_t *refcnt; - cpu = get_cpu(); + local_bh_disable(); cpu_queue = this_cpu_ptr(queue->cpu_queue); err = crypto_enqueue_request(&cpu_queue->queue, request); refcnt = crypto_tfm_ctx(request->tfm); if (err == -ENOSPC) - goto out_put_cpu; + goto out; - queue_work_on(cpu, cryptd_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work); if (!refcount_read(refcnt)) - goto out_put_cpu; + goto out; refcount_inc(refcnt); -out_put_cpu: - put_cpu(); +out: + local_bh_enable(); return err; } @@ -162,15 +166,10 @@ static void cryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct cryptd_cpu_queue, work); /* * Only handle one request at a time to avoid hogging crypto workqueue. - * preempt_disable/enable is used to prevent being preempted by - * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent - * cryptd_enqueue_request() being accessed from software interrupts. */ local_bh_disable(); - preempt_disable(); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); local_bh_enable(); if (!req) From e0c77eb37ec770188c0074a9f646717a434c65c9 Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Thu, 5 May 2022 02:20:24 +0000 Subject: [PATCH 100/116] crypto: octeontx2 - simplify the return expression of otx2_cpt_aead_cbc_aes_sha_setkey() Simplify the return expression. Reported-by: Zeal Robot Signed-off-by: Minghao Chi Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c index f8f8542ce3e4..67530e90bbfe 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -896,7 +896,6 @@ static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, struct crypto_authenc_key_param *param; int enckeylen = 0, authkeylen = 0; struct rtattr *rta = (void *)key; - int status; if (!RTA_OK(rta, keylen)) return -EINVAL; @@ -938,11 +937,7 @@ static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, ctx->enc_key_len = enckeylen; ctx->auth_key_len = authkeylen; - status = aead_hmac_init(cipher); - if (status) - return status; - - return 0; + return aead_hmac_init(cipher); } static int otx2_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, From 6ae7a8b193d353fe3a2a371b61ec4c8ecfcbb0a1 Mon Sep 17 00:00:00 2001 From: jianchunfu Date: Sun, 8 May 2022 13:22:50 +0800 Subject: [PATCH 101/116] crypto: talitos - Uniform coding style with defined variable Use the defined variable "desc" to uniform coding style. Signed-off-by: jianchunfu Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 25c9f825b8b5..c9ad6c213090 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1709,7 +1709,7 @@ static void common_nonsnoop_hash_unmap(struct device *dev, struct talitos_desc *desc2 = (struct talitos_desc *) (edesc->buf + edesc->dma_len); - unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); + unmap_single_talitos_ptr(dev, &desc->ptr[5], DMA_FROM_DEVICE); if (desc->next_desc && desc->ptr[5].ptr != desc2->ptr[5].ptr) unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE); @@ -1721,8 +1721,8 @@ static void common_nonsnoop_hash_unmap(struct device *dev, talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0); /* When using hashctx-in, must unmap it. */ - if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1)) - unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], + if (from_talitos_ptr_len(&desc->ptr[1], is_sec1)) + unmap_single_talitos_ptr(dev, &desc->ptr[1], DMA_TO_DEVICE); else if (desc->next_desc) unmap_single_talitos_ptr(dev, &desc2->ptr[1], @@ -1736,8 +1736,8 @@ static void common_nonsnoop_hash_unmap(struct device *dev, dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); - if (edesc->desc.next_desc) - dma_unmap_single(dev, be32_to_cpu(edesc->desc.next_desc), + if (desc->next_desc) + dma_unmap_single(dev, be32_to_cpu(desc->next_desc), TALITOS_DESC_SIZE, DMA_BIDIRECTIONAL); } From 349d03ffd5f62c298fd667ffa397c3fdc5c6194b Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 8 May 2022 15:09:44 +0200 Subject: [PATCH 102/116] crypto: s390 - add crypto library interface for ChaCha20 Implement a crypto library interface for the s390-native ChaCha20 cipher algorithm. This allows us to stop to select CRYPTO_CHACHA20 and instead select CRYPTO_ARCH_HAVE_LIB_CHACHA. This allows BIG_KEYS=y not to build a whole ChaCha20 crypto infrastructure as a built-in, but build a smaller CRYPTO_LIB_CHACHA instead. Make CRYPTO_CHACHA_S390 config entry to look like similar ones on other architectures. Remove CRYPTO_ALGAPI select as anyway it is selected by CRYPTO_SKCIPHER. Add a new test module and a test script for ChaCha20 cipher and its interfaces. Here are test results on an idle z15 machine: Data | Generic crypto TFM | s390 crypto TFM | s390 lib size | enc dec | enc dec | enc dec -----+--------------------+------------------+---------------- 512b | 1545ns 1295ns | 604ns 446ns | 430ns 407ns 4k | 9536ns 9463ns | 2329ns 2174ns | 2170ns 2154ns 64k | 149.6us 149.3us | 34.4us 34.5us | 33.9us 33.1us 6M | 23.61ms 23.11ms | 4223us 4160us | 3951us 4008us 60M | 143.9ms 143.9ms | 33.5ms 33.2ms | 32.2ms 32.1ms Signed-off-by: Vladis Dronov Reviewed-by: Harald Freudenberger Signed-off-by: Herbert Xu --- arch/s390/crypto/chacha-glue.c | 34 +- drivers/crypto/Kconfig | 4 +- tools/testing/crypto/chacha20-s390/Makefile | 12 + .../testing/crypto/chacha20-s390/run-tests.sh | 34 ++ .../crypto/chacha20-s390/test-cipher.c | 372 ++++++++++++++++++ 5 files changed, 452 insertions(+), 4 deletions(-) create mode 100644 tools/testing/crypto/chacha20-s390/Makefile create mode 100644 tools/testing/crypto/chacha20-s390/run-tests.sh create mode 100644 tools/testing/crypto/chacha20-s390/test-cipher.c diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c index ccfff73e2c93..2ec51f339cec 100644 --- a/arch/s390/crypto/chacha-glue.c +++ b/arch/s390/crypto/chacha-glue.c @@ -62,6 +62,34 @@ static int chacha20_s390(struct skcipher_request *req) return rc; } +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +{ + /* TODO: implement hchacha_block_arch() in assembly */ + hchacha_block_generic(state, stream, nrounds); +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + /* s390 chacha20 implementation has 20 rounds hard-coded, + * it cannot handle a block of data or less, but otherwise + * it can handle data of arbitrary size + */ + if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20) + chacha_crypt_generic(state, dst, src, bytes, nrounds); + else + chacha20_crypt_s390(state, dst, src, bytes, + &state[4], &state[12]); +} +EXPORT_SYMBOL(chacha_crypt_arch); + static struct skcipher_alg chacha_algs[] = { { .base.cra_name = "chacha20", @@ -83,12 +111,14 @@ static struct skcipher_alg chacha_algs[] = { static int __init chacha_mod_init(void) { - return crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)); + return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? + crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0; } static void __exit chacha_mod_fini(void) { - crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)); + if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) + crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)); } module_cpu_feature_match(VXRS, chacha_mod_init); diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 7b2d138bc83e..ee99c02c84e8 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -216,9 +216,9 @@ config CRYPTO_AES_S390 config CRYPTO_CHACHA_S390 tristate "ChaCha20 stream cipher" depends on S390 - select CRYPTO_ALGAPI select CRYPTO_SKCIPHER - select CRYPTO_CHACHA20 + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA help This is the s390 SIMD implementation of the ChaCha20 stream cipher (RFC 7539). diff --git a/tools/testing/crypto/chacha20-s390/Makefile b/tools/testing/crypto/chacha20-s390/Makefile new file mode 100644 index 000000000000..db81cd2fb9c5 --- /dev/null +++ b/tools/testing/crypto/chacha20-s390/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2022 Red Hat, Inc. +# Author: Vladis Dronov + +obj-m += test_cipher.o +test_cipher-y := test-cipher.o + +all: + make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules +clean: + make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean diff --git a/tools/testing/crypto/chacha20-s390/run-tests.sh b/tools/testing/crypto/chacha20-s390/run-tests.sh new file mode 100644 index 000000000000..43108794b996 --- /dev/null +++ b/tools/testing/crypto/chacha20-s390/run-tests.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2022 Red Hat, Inc. +# Author: Vladis Dronov +# +# This script runs (via instmod) test-cipher.ko module which invokes +# generic and s390-native ChaCha20 encryprion algorithms with different +# size of data. Check 'dmesg' for results. +# +# The insmod error is expected: +# insmod: ERROR: could not insert module test_cipher.ko: Operation not permitted + +lsmod | grep chacha | cut -f1 -d' ' | xargs rmmod +modprobe chacha_generic +modprobe chacha_s390 + +# run encryption for different data size, including whole block(s) +/- 1 +insmod test_cipher.ko size=63 +insmod test_cipher.ko size=64 +insmod test_cipher.ko size=65 +insmod test_cipher.ko size=127 +insmod test_cipher.ko size=128 +insmod test_cipher.ko size=129 +insmod test_cipher.ko size=511 +insmod test_cipher.ko size=512 +insmod test_cipher.ko size=513 +insmod test_cipher.ko size=4096 +insmod test_cipher.ko size=65611 +insmod test_cipher.ko size=6291456 +insmod test_cipher.ko size=62914560 + +# print test logs +dmesg | tail -170 diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c new file mode 100644 index 000000000000..34e8b855266f --- /dev/null +++ b/tools/testing/crypto/chacha20-s390/test-cipher.c @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2022 Red Hat, Inc. + * Author: Vladis Dronov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int data_size __read_mostly = 256; +static unsigned int debug __read_mostly = 0; + +/* tie all skcipher structures together */ +struct skcipher_def { + struct scatterlist sginp, sgout; + struct crypto_skcipher *tfm; + struct skcipher_request *req; + struct crypto_wait wait; +}; + +/* Perform cipher operations with the chacha lib */ +static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) +{ + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 iv[16], key[32]; + u64 start, end; + + memset(key, 'X', sizeof(key)); + memset(iv, 'I', sizeof(iv)); + + if (debug) { + print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET, + 16, 1, key, 32, 1); + + print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET, + 16, 1, iv, 16, 1); + } + + /* Encrypt */ + chacha_init_arch(chacha_state, (u32*)key, iv); + + start = ktime_get_ns(); + chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20); + end = ktime_get_ns(); + + + if (debug) + print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET, + 16, 1, cipher, + (data_size > 64 ? 64 : data_size), 1); + + pr_info("lib encryption took: %lld nsec", end - start); + + /* Decrypt */ + chacha_init_arch(chacha_state, (u32 *)key, iv); + + start = ktime_get_ns(); + chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20); + end = ktime_get_ns(); + + if (debug) + print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET, + 16, 1, revert, + (data_size > 64 ? 64 : data_size), 1); + + pr_info("lib decryption took: %lld nsec", end - start); + + return 0; +} + +/* Perform cipher operations with skcipher */ +static unsigned int test_skcipher_encdec(struct skcipher_def *sk, + int enc) +{ + int rc; + + if (enc) { + rc = crypto_wait_req(crypto_skcipher_encrypt(sk->req), + &sk->wait); + if (rc) + pr_info("skcipher encrypt returned with result" + "%d\n", rc); + } + else + { + rc = crypto_wait_req(crypto_skcipher_decrypt(sk->req), + &sk->wait); + if (rc) + pr_info("skcipher decrypt returned with result" + "%d\n", rc); + } + + return rc; +} + +/* Initialize and trigger cipher operations */ +static int test_skcipher(char *name, u8 *revert, u8 *cipher, u8 *plain) +{ + struct skcipher_def sk; + struct crypto_skcipher *skcipher = NULL; + struct skcipher_request *req = NULL; + u8 iv[16], key[32]; + u64 start, end; + int ret = -EFAULT; + + skcipher = crypto_alloc_skcipher(name, 0, 0); + if (IS_ERR(skcipher)) { + pr_info("could not allocate skcipher %s handle\n", name); + return PTR_ERR(skcipher); + } + + req = skcipher_request_alloc(skcipher, GFP_KERNEL); + if (!req) { + pr_info("could not allocate skcipher request\n"); + ret = -ENOMEM; + goto out; + } + + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, + &sk.wait); + + memset(key, 'X', sizeof(key)); + memset(iv, 'I', sizeof(iv)); + + if (crypto_skcipher_setkey(skcipher, key, 32)) { + pr_info("key could not be set\n"); + ret = -EAGAIN; + goto out; + } + + if (debug) { + print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET, + 16, 1, key, 32, 1); + + print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET, + 16, 1, iv, 16, 1); + } + + sk.tfm = skcipher; + sk.req = req; + + /* Encrypt in one pass */ + sg_init_one(&sk.sginp, plain, data_size); + sg_init_one(&sk.sgout, cipher, data_size); + skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout, + data_size, iv); + crypto_init_wait(&sk.wait); + + /* Encrypt data */ + start = ktime_get_ns(); + ret = test_skcipher_encdec(&sk, 1); + end = ktime_get_ns(); + + if (ret) + goto out; + + pr_info("%s tfm encryption successful, took %lld nsec\n", name, end - start); + + if (debug) + print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET, + 16, 1, cipher, + (data_size > 64 ? 64 : data_size), 1); + + /* Prepare for decryption */ + memset(iv, 'I', sizeof(iv)); + + sg_init_one(&sk.sginp, cipher, data_size); + sg_init_one(&sk.sgout, revert, data_size); + skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout, + data_size, iv); + crypto_init_wait(&sk.wait); + + /* Decrypt data */ + start = ktime_get_ns(); + ret = test_skcipher_encdec(&sk, 0); + end = ktime_get_ns(); + + if (ret) + goto out; + + pr_info("%s tfm decryption successful, took %lld nsec\n", name, end - start); + + if (debug) + print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET, + 16, 1, revert, + (data_size > 64 ? 64 : data_size), 1); + + /* Dump some internal skcipher data */ + if (debug) + pr_info("skcipher %s: cryptlen %d blksize %d stride %d " + "ivsize %d alignmask 0x%x\n", + name, sk.req->cryptlen, + crypto_skcipher_blocksize(sk.tfm), + crypto_skcipher_alg(sk.tfm)->walksize, + crypto_skcipher_ivsize(sk.tfm), + crypto_skcipher_alignmask(sk.tfm)); + +out: + if (skcipher) + crypto_free_skcipher(skcipher); + if (req) + skcipher_request_free(req); + return ret; +} + +static int __init chacha_s390_test_init(void) +{ + u8 *plain = NULL, *revert = NULL; + u8 *cipher_generic = NULL, *cipher_s390 = NULL; + int ret = -1; + + pr_info("s390 ChaCha20 test module: size=%d debug=%d\n", + data_size, debug); + + /* Allocate and fill buffers */ + plain = vmalloc(data_size); + if (!plain) { + pr_info("could not allocate plain buffer\n"); + ret = -2; + goto out; + } + memset(plain, 'a', data_size); + get_random_bytes(plain, (data_size > 256 ? 256 : data_size)); + + cipher_generic = vmalloc(data_size); + if (!cipher_generic) { + pr_info("could not allocate cipher_generic buffer\n"); + ret = -2; + goto out; + } + memset(cipher_generic, 0, data_size); + + cipher_s390 = vmalloc(data_size); + if (!cipher_s390) { + pr_info("could not allocate cipher_s390 buffer\n"); + ret = -2; + goto out; + } + memset(cipher_s390, 0, data_size); + + revert = vmalloc(data_size); + if (!revert) { + pr_info("could not allocate revert buffer\n"); + ret = -2; + goto out; + } + memset(revert, 0, data_size); + + if (debug) + print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET, + 16, 1, plain, + (data_size > 64 ? 64 : data_size), 1); + + /* Use chacha20 generic */ + ret = test_skcipher("chacha20-generic", revert, cipher_generic, plain); + if (ret) + goto out; + + if (memcmp(plain, revert, data_size)) { + pr_info("generic en/decryption check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("generic en/decryption check OK\n"); + + memset(revert, 0, data_size); + + /* Use chacha20 s390 */ + ret = test_skcipher("chacha20-s390", revert, cipher_s390, plain); + if (ret) + goto out; + + if (memcmp(plain, revert, data_size)) { + pr_info("s390 en/decryption check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("s390 en/decryption check OK\n"); + + if (memcmp(cipher_generic, cipher_s390, data_size)) { + pr_info("s390 vs generic check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("s390 vs generic check OK\n"); + + memset(cipher_s390, 0, data_size); + memset(revert, 0, data_size); + + /* Use chacha20 lib */ + test_lib_chacha(revert, cipher_s390, plain); + + if (memcmp(plain, revert, data_size)) { + pr_info("lib en/decryption check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("lib en/decryption check OK\n"); + + if (memcmp(cipher_generic, cipher_s390, data_size)) { + pr_info("lib vs generic check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("lib vs generic check OK\n"); + + pr_info("--- chacha20 s390 test end ---\n"); + +out: + if (plain) + vfree(plain); + if (cipher_generic) + vfree(cipher_generic); + if (cipher_s390) + vfree(cipher_s390); + if (revert) + vfree(revert); + + return -1; +} + +static void __exit chacha_s390_test_exit(void) +{ + pr_info("s390 ChaCha20 test module exit\n"); +} + +module_param_named(size, data_size, uint, 0660); +module_param(debug, int, 0660); +MODULE_PARM_DESC(size, "Size of a plaintext"); +MODULE_PARM_DESC(debug, "Debug level (0=off,1=on)"); + +module_init(chacha_s390_test_init); +module_exit(chacha_s390_test_exit); + +MODULE_DESCRIPTION("s390 ChaCha20 self-test"); +MODULE_AUTHOR("Vladis Dronov "); +MODULE_LICENSE("GPL v2"); From 1731160ff7c7bbb11bb1aacb14dd25e18d522779 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:19:27 +0100 Subject: [PATCH 103/116] crypto: qat - set to zero DH parameters before free Set to zero the context buffers containing the DH key before they are freed. This is a defense in depth measure that avoids keys to be recovered from memory in case the system is compromised between the free of the buffer and when that area of memory (containing keys) gets overwritten. Cc: stable@vger.kernel.org Fixes: c9839143ebbf ("crypto: qat - Add DH support") Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index b0b78445418b..5633f9df3b6f 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -420,14 +420,17 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params) static void qat_dh_clear_ctx(struct device *dev, struct qat_dh_ctx *ctx) { if (ctx->g) { + memset(ctx->g, 0, ctx->p_size); dma_free_coherent(dev, ctx->p_size, ctx->g, ctx->dma_g); ctx->g = NULL; } if (ctx->xa) { + memset(ctx->xa, 0, ctx->p_size); dma_free_coherent(dev, ctx->p_size, ctx->xa, ctx->dma_xa); ctx->xa = NULL; } if (ctx->p) { + memset(ctx->p, 0, ctx->p_size); dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); ctx->p = NULL; } From e0831e7af4e03f2715de102e18e9179ec0a81562 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:08 +0100 Subject: [PATCH 104/116] crypto: qat - use pre-allocated buffers in datapath In order to do DMAs, the QAT device requires that the scatterlist structures are mapped and translated into a format that the firmware can understand. This is defined as the composition of a scatter gather list (SGL) descriptor header, the struct qat_alg_buf_list, plus a variable number of flat buffer descriptors, the struct qat_alg_buf. The allocation and mapping of these data structures is done each time a request is received from the skcipher and aead APIs. In an OOM situation, this behaviour might lead to a dead-lock if an allocation fails. Based on the conversation in [1], increase the size of the aead and skcipher request contexts to include an SGL descriptor that can handle a maximum of 4 flat buffers. If requests exceed 4 entries buffers, memory is allocated dynamically. [1] https://lore.kernel.org/linux-crypto/20200722072932.GA27544@gondor.apana.org.au/ Cc: stable@vger.kernel.org Fixes: d370cec32194 ("crypto: qat - Intel(R) QAT crypto interface") Reported-by: Mikulas Patocka Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 64 +++++++++++++--------- drivers/crypto/qat/qat_common/qat_crypto.h | 24 ++++++++ 2 files changed, 61 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index f998ed58457c..ec635fe44c1f 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -46,19 +46,6 @@ static DEFINE_MUTEX(algs_lock); static unsigned int active_devs; -struct qat_alg_buf { - u32 len; - u32 resrvd; - u64 addr; -} __packed; - -struct qat_alg_buf_list { - u64 resrvd; - u32 num_bufs; - u32 num_mapped_bufs; - struct qat_alg_buf bufers[]; -} __packed __aligned(64); - /* Common content descriptor */ struct qat_alg_cd { union { @@ -693,7 +680,10 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, bl->bufers[i].len, DMA_BIDIRECTIONAL); dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); - kfree(bl); + + if (!qat_req->buf.sgl_src_valid) + kfree(bl); + if (blp != blpout) { /* If out of place operation dma unmap only data */ int bufless = blout->num_bufs - blout->num_mapped_bufs; @@ -704,7 +694,9 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, DMA_BIDIRECTIONAL); } dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); - kfree(blout); + + if (!qat_req->buf.sgl_dst_valid) + kfree(blout); } } @@ -721,15 +713,24 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, dma_addr_t blp = DMA_MAPPING_ERROR; dma_addr_t bloutp = DMA_MAPPING_ERROR; struct scatterlist *sg; - size_t sz_out, sz = struct_size(bufl, bufers, n + 1); + size_t sz_out, sz = struct_size(bufl, bufers, n); + int node = dev_to_node(&GET_DEV(inst->accel_dev)); if (unlikely(!n)) return -EINVAL; - bufl = kzalloc_node(sz, GFP_ATOMIC, - dev_to_node(&GET_DEV(inst->accel_dev))); - if (unlikely(!bufl)) - return -ENOMEM; + qat_req->buf.sgl_src_valid = false; + qat_req->buf.sgl_dst_valid = false; + + if (n > QAT_MAX_BUFF_DESC) { + bufl = kzalloc_node(sz, GFP_ATOMIC, node); + if (unlikely(!bufl)) + return -ENOMEM; + } else { + bufl = &qat_req->buf.sgl_src.sgl_hdr; + memset(bufl, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_src_valid = true; + } for_each_sg(sgl, sg, n, i) bufl->bufers[i].addr = DMA_MAPPING_ERROR; @@ -760,12 +761,18 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, struct qat_alg_buf *bufers; n = sg_nents(sglout); - sz_out = struct_size(buflout, bufers, n + 1); + sz_out = struct_size(buflout, bufers, n); sg_nctr = 0; - buflout = kzalloc_node(sz_out, GFP_ATOMIC, - dev_to_node(&GET_DEV(inst->accel_dev))); - if (unlikely(!buflout)) - goto err_in; + + if (n > QAT_MAX_BUFF_DESC) { + buflout = kzalloc_node(sz_out, GFP_ATOMIC, node); + if (unlikely(!buflout)) + goto err_in; + } else { + buflout = &qat_req->buf.sgl_dst.sgl_hdr; + memset(buflout, 0, sizeof(struct qat_alg_buf_list)); + qat_req->buf.sgl_dst_valid = true; + } bufers = buflout->bufers; for_each_sg(sglout, sg, n, i) @@ -810,7 +817,9 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, dma_unmap_single(dev, buflout->bufers[i].addr, buflout->bufers[i].len, DMA_BIDIRECTIONAL); - kfree(buflout); + + if (!qat_req->buf.sgl_dst_valid) + kfree(buflout); err_in: if (!dma_mapping_error(dev, blp)) @@ -823,7 +832,8 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, bufl->bufers[i].len, DMA_BIDIRECTIONAL); - kfree(bufl); + if (!qat_req->buf.sgl_src_valid) + kfree(bufl); dev_err(dev, "Failed to map buf for dma\n"); return -ENOMEM; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index b6a4c95ae003..0928f159ea99 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -21,6 +21,26 @@ struct qat_crypto_instance { atomic_t refctr; }; +#define QAT_MAX_BUFF_DESC 4 + +struct qat_alg_buf { + u32 len; + u32 resrvd; + u64 addr; +} __packed; + +struct qat_alg_buf_list { + u64 resrvd; + u32 num_bufs; + u32 num_mapped_bufs; + struct qat_alg_buf bufers[]; +} __packed; + +struct qat_alg_fixed_buf_list { + struct qat_alg_buf_list sgl_hdr; + struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; +} __packed __aligned(64); + struct qat_crypto_request_buffs { struct qat_alg_buf_list *bl; dma_addr_t blp; @@ -28,6 +48,10 @@ struct qat_crypto_request_buffs { dma_addr_t bloutp; size_t sz; size_t sz_out; + bool sgl_src_valid; + bool sgl_dst_valid; + struct qat_alg_fixed_buf_list sgl_src; + struct qat_alg_fixed_buf_list sgl_dst; }; struct qat_crypto_request; From af88d3c109aa5edfaa11c9a26d9c0ff21ddf501c Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:09 +0100 Subject: [PATCH 105/116] crypto: qat - refactor submission logic All the algorithms in qat_algs.c and qat_asym_algs.c use the same pattern to submit messages to the HW queues. Move the submission loop to a new function, qat_alg_send_message(), and share it between the symmetric and the asymmetric algorithms. As part of this rework, since the number of retries before returning an error is inconsistent between the symmetric and asymmetric implementations, set it to a value that works for both (i.e. 20, was 10 in qat_algs.c and 100 in qat_asym_algs.c) In addition fix the return code reported when the HW queues are full. In that case return -ENOSPC instead of -EBUSY. Including stable in CC since (1) the error code returned if the HW queues are full is incorrect and (2) to facilitate the backport of the next fix "crypto: qat - add backlog mechanism". Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/Makefile | 1 + drivers/crypto/qat/qat_common/qat_algs.c | 68 +++++++++---------- drivers/crypto/qat/qat_common/qat_algs_send.c | 21 ++++++ drivers/crypto/qat/qat_common/qat_algs_send.h | 10 +++ drivers/crypto/qat/qat_common/qat_asym_algs.c | 50 +++++++++----- drivers/crypto/qat/qat_common/qat_crypto.h | 5 ++ 6 files changed, 101 insertions(+), 54 deletions(-) create mode 100644 drivers/crypto/qat/qat_common/qat_algs_send.c create mode 100644 drivers/crypto/qat/qat_common/qat_algs_send.h diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index f25a6c8edfc7..04f058acc4d3 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -16,6 +16,7 @@ intel_qat-objs := adf_cfg.o \ qat_crypto.o \ qat_algs.o \ qat_asym_algs.o \ + qat_algs_send.o \ qat_uclo.o \ qat_hal.o diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index ec635fe44c1f..6017ae82c713 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -17,7 +17,7 @@ #include #include #include "adf_accel_devices.h" -#include "adf_transport.h" +#include "qat_algs_send.h" #include "adf_common_drv.h" #include "qat_crypto.h" #include "icp_qat_hw.h" @@ -939,6 +939,17 @@ void qat_alg_callback(void *resp) qat_req->cb(qat_resp, qat_req); } +static int qat_alg_send_sym_message(struct qat_crypto_request *qat_req, + struct qat_crypto_instance *inst) +{ + struct qat_alg_req req; + + req.fw_req = (u32 *)&qat_req->req; + req.tx_ring = inst->sym_tx; + + return qat_alg_send_message(&req); +} + static int qat_alg_aead_dec(struct aead_request *areq) { struct crypto_aead *aead_tfm = crypto_aead_reqtfm(areq); @@ -949,7 +960,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) struct icp_qat_fw_la_auth_req_params *auth_param; struct icp_qat_fw_la_bulk_req *msg; int digst_size = crypto_aead_authsize(aead_tfm); - int ret, ctr = 0; + int ret; u32 cipher_len; cipher_len = areq->cryptlen - digst_size; @@ -975,15 +986,12 @@ static int qat_alg_aead_dec(struct aead_request *areq) auth_param = (void *)((u8 *)cipher_param + sizeof(*cipher_param)); auth_param->auth_off = 0; auth_param->auth_len = areq->assoclen + cipher_param->cipher_length; - do { - ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); - } while (ret == -EAGAIN && ctr++ < 10); - if (ret == -EAGAIN) { + ret = qat_alg_send_sym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); - return -EBUSY; - } - return -EINPROGRESS; + + return ret; } static int qat_alg_aead_enc(struct aead_request *areq) @@ -996,7 +1004,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) struct icp_qat_fw_la_auth_req_params *auth_param; struct icp_qat_fw_la_bulk_req *msg; u8 *iv = areq->iv; - int ret, ctr = 0; + int ret; if (areq->cryptlen % AES_BLOCK_SIZE != 0) return -EINVAL; @@ -1023,15 +1031,11 @@ static int qat_alg_aead_enc(struct aead_request *areq) auth_param->auth_off = 0; auth_param->auth_len = areq->assoclen + areq->cryptlen; - do { - ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); - } while (ret == -EAGAIN && ctr++ < 10); - - if (ret == -EAGAIN) { + ret = qat_alg_send_sym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); - return -EBUSY; - } - return -EINPROGRESS; + + return ret; } static int qat_alg_skcipher_rekey(struct qat_alg_skcipher_ctx *ctx, @@ -1184,7 +1188,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) struct qat_crypto_request *qat_req = skcipher_request_ctx(req); struct icp_qat_fw_la_cipher_req_params *cipher_param; struct icp_qat_fw_la_bulk_req *msg; - int ret, ctr = 0; + int ret; if (req->cryptlen == 0) return 0; @@ -1208,15 +1212,11 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) qat_alg_set_req_iv(qat_req); - do { - ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); - } while (ret == -EAGAIN && ctr++ < 10); - - if (ret == -EAGAIN) { + ret = qat_alg_send_sym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); - return -EBUSY; - } - return -EINPROGRESS; + + return ret; } static int qat_alg_skcipher_blk_encrypt(struct skcipher_request *req) @@ -1253,7 +1253,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) struct qat_crypto_request *qat_req = skcipher_request_ctx(req); struct icp_qat_fw_la_cipher_req_params *cipher_param; struct icp_qat_fw_la_bulk_req *msg; - int ret, ctr = 0; + int ret; if (req->cryptlen == 0) return 0; @@ -1278,15 +1278,11 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) qat_alg_set_req_iv(qat_req); qat_alg_update_iv(qat_req); - do { - ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg); - } while (ret == -EAGAIN && ctr++ < 10); - - if (ret == -EAGAIN) { + ret = qat_alg_send_sym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); - return -EBUSY; - } - return -EINPROGRESS; + + return ret; } static int qat_alg_skcipher_blk_decrypt(struct skcipher_request *req) diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.c b/drivers/crypto/qat/qat_common/qat_algs_send.c new file mode 100644 index 000000000000..78f1bb8c26c0 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_algs_send.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) +/* Copyright(c) 2022 Intel Corporation */ +#include "adf_transport.h" +#include "qat_algs_send.h" +#include "qat_crypto.h" + +#define ADF_MAX_RETRIES 20 + +int qat_alg_send_message(struct qat_alg_req *req) +{ + int ret = 0, ctr = 0; + + do { + ret = adf_send_message(req->tx_ring, req->fw_req); + } while (ret == -EAGAIN && ctr++ < ADF_MAX_RETRIES); + + if (ret == -EAGAIN) + return -ENOSPC; + + return -EINPROGRESS; +} diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.h b/drivers/crypto/qat/qat_common/qat_algs_send.h new file mode 100644 index 000000000000..3fa685d0c293 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_algs_send.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef QAT_ALGS_SEND_H +#define QAT_ALGS_SEND_H + +#include "qat_crypto.h" + +int qat_alg_send_message(struct qat_alg_req *req); + +#endif diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 5633f9df3b6f..08b8d83e070a 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -12,6 +12,7 @@ #include #include "icp_qat_fw_pke.h" #include "adf_accel_devices.h" +#include "qat_algs_send.h" #include "adf_transport.h" #include "adf_common_drv.h" #include "qat_crypto.h" @@ -137,6 +138,17 @@ struct qat_asym_request { void (*cb)(struct icp_qat_fw_pke_resp *resp); } __aligned(64); +static int qat_alg_send_asym_message(struct qat_asym_request *qat_req, + struct qat_crypto_instance *inst) +{ + struct qat_alg_req req; + + req.fw_req = (u32 *)&qat_req->req; + req.tx_ring = inst->pke_tx; + + return qat_alg_send_message(&req); +} + static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) { struct qat_asym_request *req = (void *)(__force long)resp->opaque; @@ -213,7 +225,7 @@ static int qat_dh_compute_value(struct kpp_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(kpp_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; - int ret, ctr = 0; + int ret; int n_input_params = 0; if (unlikely(!ctx->xa)) @@ -338,13 +350,13 @@ static int qat_dh_compute_value(struct kpp_request *req) msg->input_param_count = n_input_params; msg->output_param_count = 1; - do { - ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg); - } while (ret == -EBUSY && ctr++ < 100); + ret = qat_alg_send_asym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) + goto unmap_all; - if (!ret) - return -EINPROGRESS; + return ret; +unmap_all: if (!dma_mapping_error(dev, qat_req->phy_out)) dma_unmap_single(dev, qat_req->phy_out, sizeof(struct qat_dh_output_params), @@ -645,7 +657,7 @@ static int qat_rsa_enc(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; - int ret, ctr = 0; + int ret; if (unlikely(!ctx->n || !ctx->e)) return -EINVAL; @@ -735,13 +747,14 @@ static int qat_rsa_enc(struct akcipher_request *req) msg->pke_mid.opaque = (u64)(__force long)qat_req; msg->input_param_count = 3; msg->output_param_count = 1; - do { - ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg); - } while (ret == -EBUSY && ctr++ < 100); - if (!ret) - return -EINPROGRESS; + ret = qat_alg_send_asym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) + goto unmap_all; + return ret; + +unmap_all: if (!dma_mapping_error(dev, qat_req->phy_out)) dma_unmap_single(dev, qat_req->phy_out, sizeof(struct qat_rsa_output_params), @@ -779,7 +792,7 @@ static int qat_rsa_dec(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; - int ret, ctr = 0; + int ret; if (unlikely(!ctx->n || !ctx->d)) return -EINVAL; @@ -887,13 +900,14 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->input_param_count = 3; msg->output_param_count = 1; - do { - ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg); - } while (ret == -EBUSY && ctr++ < 100); - if (!ret) - return -EINPROGRESS; + ret = qat_alg_send_asym_message(qat_req, ctx->inst); + if (ret == -ENOSPC) + goto unmap_all; + return ret; + +unmap_all: if (!dma_mapping_error(dev, qat_req->phy_out)) dma_unmap_single(dev, qat_req->phy_out, sizeof(struct qat_rsa_output_params), diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index 0928f159ea99..0dcba6fc358c 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -9,6 +9,11 @@ #include "adf_accel_devices.h" #include "icp_qat_fw_la.h" +struct qat_alg_req { + u32 *fw_req; + struct adf_etr_ring_data *tx_ring; +}; + struct qat_crypto_instance { struct adf_etr_ring_data *sym_tx; struct adf_etr_ring_data *sym_rx; From 38682383973280e5be2802ba8a8d4a636d36cb19 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:10 +0100 Subject: [PATCH 106/116] crypto: qat - add backlog mechanism The implementations of the crypto algorithms (aead, skcipher, etc) in the QAT driver do not properly support requests with the CRYPTO_TFM_REQ_MAY_BACKLOG flag set. If the HW queue is full, the driver returns -EBUSY but does not enqueue the request. This can result in applications like dm-crypt waiting indefinitely for the completion of a request that was never submitted to the hardware. Fix this by adding a software backlog queue: if the ring buffer is more than eighty percent full, then the request is enqueued to a backlog list and the error code -EBUSY is returned back to the caller. Requests in the backlog queue are resubmitted at a later time, in the context of the callback of a previously submitted request. The request for which -EBUSY is returned is then marked as -EINPROGRESS once submitted to the HW queues. The submission loop inside the function qat_alg_send_message() has been modified to decide which submission policy to use based on the request flags. If the request does not have the CRYPTO_TFM_REQ_MAY_BACKLOG set, the previous behaviour has been preserved. Based on a patch by Vishnu Das Ramachandran Cc: stable@vger.kernel.org Fixes: d370cec32194 ("crypto: qat - Intel(R) QAT crypto interface") Reported-by: Mikulas Patocka Reported-by: Kyle Sanderson Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_transport.c | 11 +++ drivers/crypto/qat/qat_common/adf_transport.h | 1 + .../qat/qat_common/adf_transport_internal.h | 1 + drivers/crypto/qat/qat_common/qat_algs.c | 24 ++++--- drivers/crypto/qat/qat_common/qat_algs_send.c | 67 ++++++++++++++++++- drivers/crypto/qat/qat_common/qat_algs_send.h | 1 + drivers/crypto/qat/qat_common/qat_asym_algs.c | 23 ++++--- drivers/crypto/qat/qat_common/qat_crypto.c | 3 + drivers/crypto/qat/qat_common/qat_crypto.h | 10 +++ 9 files changed, 123 insertions(+), 18 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c index 8ba28409fb74..630d0483c4e0 100644 --- a/drivers/crypto/qat/qat_common/adf_transport.c +++ b/drivers/crypto/qat/qat_common/adf_transport.c @@ -8,6 +8,9 @@ #include "adf_cfg.h" #include "adf_common_drv.h" +#define ADF_MAX_RING_THRESHOLD 80 +#define ADF_PERCENT(tot, percent) (((tot) * (percent)) / 100) + static inline u32 adf_modulo(u32 data, u32 shift) { u32 div = data >> shift; @@ -77,6 +80,11 @@ static void adf_disable_ring_irq(struct adf_etr_bank_data *bank, u32 ring) bank->irq_mask); } +bool adf_ring_nearly_full(struct adf_etr_ring_data *ring) +{ + return atomic_read(ring->inflights) > ring->threshold; +} + int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg) { struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(ring->bank->accel_dev); @@ -217,6 +225,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, struct adf_etr_bank_data *bank; struct adf_etr_ring_data *ring; char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; + int max_inflights; u32 ring_num; int ret; @@ -263,6 +272,8 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, ring->ring_size = adf_verify_ring_size(msg_size, num_msgs); ring->head = 0; ring->tail = 0; + max_inflights = ADF_MAX_INFLIGHTS(ring->ring_size, ring->msg_size); + ring->threshold = ADF_PERCENT(max_inflights, ADF_MAX_RING_THRESHOLD); atomic_set(ring->inflights, 0); ret = adf_init_ring(ring); if (ret) diff --git a/drivers/crypto/qat/qat_common/adf_transport.h b/drivers/crypto/qat/qat_common/adf_transport.h index 2c95f1697c76..e6ef6f9b7691 100644 --- a/drivers/crypto/qat/qat_common/adf_transport.h +++ b/drivers/crypto/qat/qat_common/adf_transport.h @@ -14,6 +14,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, const char *ring_name, adf_callback_fn callback, int poll_mode, struct adf_etr_ring_data **ring_ptr); +bool adf_ring_nearly_full(struct adf_etr_ring_data *ring); int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg); void adf_remove_ring(struct adf_etr_ring_data *ring); #endif diff --git a/drivers/crypto/qat/qat_common/adf_transport_internal.h b/drivers/crypto/qat/qat_common/adf_transport_internal.h index 501bcf0f1809..8b2c92ba7ca1 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_internal.h +++ b/drivers/crypto/qat/qat_common/adf_transport_internal.h @@ -22,6 +22,7 @@ struct adf_etr_ring_data { spinlock_t lock; /* protects ring data struct */ u16 head; u16 tail; + u32 threshold; u8 ring_number; u8 ring_size; u8 msg_size; diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 6017ae82c713..873533dc43a7 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -935,19 +935,25 @@ void qat_alg_callback(void *resp) struct icp_qat_fw_la_resp *qat_resp = resp; struct qat_crypto_request *qat_req = (void *)(__force long)qat_resp->opaque_data; + struct qat_instance_backlog *backlog = qat_req->alg_req.backlog; qat_req->cb(qat_resp, qat_req); + + qat_alg_send_backlog(backlog); } static int qat_alg_send_sym_message(struct qat_crypto_request *qat_req, - struct qat_crypto_instance *inst) + struct qat_crypto_instance *inst, + struct crypto_async_request *base) { - struct qat_alg_req req; + struct qat_alg_req *alg_req = &qat_req->alg_req; - req.fw_req = (u32 *)&qat_req->req; - req.tx_ring = inst->sym_tx; + alg_req->fw_req = (u32 *)&qat_req->req; + alg_req->tx_ring = inst->sym_tx; + alg_req->base = base; + alg_req->backlog = &inst->backlog; - return qat_alg_send_message(&req); + return qat_alg_send_message(alg_req); } static int qat_alg_aead_dec(struct aead_request *areq) @@ -987,7 +993,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) auth_param->auth_off = 0; auth_param->auth_len = areq->assoclen + cipher_param->cipher_length; - ret = qat_alg_send_sym_message(qat_req, ctx->inst); + ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); @@ -1031,7 +1037,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) auth_param->auth_off = 0; auth_param->auth_len = areq->assoclen + areq->cryptlen; - ret = qat_alg_send_sym_message(qat_req, ctx->inst); + ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); @@ -1212,7 +1218,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) qat_alg_set_req_iv(qat_req); - ret = qat_alg_send_sym_message(qat_req, ctx->inst); + ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); @@ -1278,7 +1284,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) qat_alg_set_req_iv(qat_req); qat_alg_update_iv(qat_req); - ret = qat_alg_send_sym_message(qat_req, ctx->inst); + ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) qat_alg_free_bufl(ctx->inst, qat_req); diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.c b/drivers/crypto/qat/qat_common/qat_algs_send.c index 78f1bb8c26c0..ff5b4347f783 100644 --- a/drivers/crypto/qat/qat_common/qat_algs_send.c +++ b/drivers/crypto/qat/qat_common/qat_algs_send.c @@ -6,7 +6,7 @@ #define ADF_MAX_RETRIES 20 -int qat_alg_send_message(struct qat_alg_req *req) +static int qat_alg_send_message_retry(struct qat_alg_req *req) { int ret = 0, ctr = 0; @@ -19,3 +19,68 @@ int qat_alg_send_message(struct qat_alg_req *req) return -EINPROGRESS; } + +void qat_alg_send_backlog(struct qat_instance_backlog *backlog) +{ + struct qat_alg_req *req, *tmp; + + spin_lock_bh(&backlog->lock); + list_for_each_entry_safe(req, tmp, &backlog->list, list) { + if (adf_send_message(req->tx_ring, req->fw_req)) { + /* The HW ring is full. Do nothing. + * qat_alg_send_backlog() will be invoked again by + * another callback. + */ + break; + } + list_del(&req->list); + req->base->complete(req->base, -EINPROGRESS); + } + spin_unlock_bh(&backlog->lock); +} + +static void qat_alg_backlog_req(struct qat_alg_req *req, + struct qat_instance_backlog *backlog) +{ + INIT_LIST_HEAD(&req->list); + + spin_lock_bh(&backlog->lock); + list_add_tail(&req->list, &backlog->list); + spin_unlock_bh(&backlog->lock); +} + +static int qat_alg_send_message_maybacklog(struct qat_alg_req *req) +{ + struct qat_instance_backlog *backlog = req->backlog; + struct adf_etr_ring_data *tx_ring = req->tx_ring; + u32 *fw_req = req->fw_req; + + /* If any request is already backlogged, then add to backlog list */ + if (!list_empty(&backlog->list)) + goto enqueue; + + /* If ring is nearly full, then add to backlog list */ + if (adf_ring_nearly_full(tx_ring)) + goto enqueue; + + /* If adding request to HW ring fails, then add to backlog list */ + if (adf_send_message(tx_ring, fw_req)) + goto enqueue; + + return -EINPROGRESS; + +enqueue: + qat_alg_backlog_req(req, backlog); + + return -EBUSY; +} + +int qat_alg_send_message(struct qat_alg_req *req) +{ + u32 flags = req->base->flags; + + if (flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + return qat_alg_send_message_maybacklog(req); + else + return qat_alg_send_message_retry(req); +} diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.h b/drivers/crypto/qat/qat_common/qat_algs_send.h index 3fa685d0c293..5ce9f4f69d8f 100644 --- a/drivers/crypto/qat/qat_common/qat_algs_send.h +++ b/drivers/crypto/qat/qat_common/qat_algs_send.h @@ -6,5 +6,6 @@ #include "qat_crypto.h" int qat_alg_send_message(struct qat_alg_req *req); +void qat_alg_send_backlog(struct qat_instance_backlog *backlog); #endif diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 08b8d83e070a..ff7249c093c9 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -136,17 +136,21 @@ struct qat_asym_request { } areq; int err; void (*cb)(struct icp_qat_fw_pke_resp *resp); + struct qat_alg_req alg_req; } __aligned(64); static int qat_alg_send_asym_message(struct qat_asym_request *qat_req, - struct qat_crypto_instance *inst) + struct qat_crypto_instance *inst, + struct crypto_async_request *base) { - struct qat_alg_req req; + struct qat_alg_req *alg_req = &qat_req->alg_req; - req.fw_req = (u32 *)&qat_req->req; - req.tx_ring = inst->pke_tx; + alg_req->fw_req = (u32 *)&qat_req->req; + alg_req->tx_ring = inst->pke_tx; + alg_req->base = base; + alg_req->backlog = &inst->backlog; - return qat_alg_send_message(&req); + return qat_alg_send_message(alg_req); } static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) @@ -350,7 +354,7 @@ static int qat_dh_compute_value(struct kpp_request *req) msg->input_param_count = n_input_params; msg->output_param_count = 1; - ret = qat_alg_send_asym_message(qat_req, ctx->inst); + ret = qat_alg_send_asym_message(qat_req, inst, &req->base); if (ret == -ENOSPC) goto unmap_all; @@ -557,8 +561,11 @@ void qat_alg_asym_callback(void *_resp) { struct icp_qat_fw_pke_resp *resp = _resp; struct qat_asym_request *areq = (void *)(__force long)resp->opaque; + struct qat_instance_backlog *backlog = areq->alg_req.backlog; areq->cb(resp); + + qat_alg_send_backlog(backlog); } #define PKE_RSA_EP_512 0x1c161b21 @@ -748,7 +755,7 @@ static int qat_rsa_enc(struct akcipher_request *req) msg->input_param_count = 3; msg->output_param_count = 1; - ret = qat_alg_send_asym_message(qat_req, ctx->inst); + ret = qat_alg_send_asym_message(qat_req, inst, &req->base); if (ret == -ENOSPC) goto unmap_all; @@ -901,7 +908,7 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->output_param_count = 1; - ret = qat_alg_send_asym_message(qat_req, ctx->inst); + ret = qat_alg_send_asym_message(qat_req, inst, &req->base); if (ret == -ENOSPC) goto unmap_all; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 67c9588e89df..80d905ed102e 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -353,6 +353,9 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev) &inst->pke_rx); if (ret) goto err; + + INIT_LIST_HEAD(&inst->backlog.list); + spin_lock_init(&inst->backlog.lock); } return 0; err: diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index 0dcba6fc358c..245b6d9a3650 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -9,9 +9,17 @@ #include "adf_accel_devices.h" #include "icp_qat_fw_la.h" +struct qat_instance_backlog { + struct list_head list; + spinlock_t lock; /* protects backlog list */ +}; + struct qat_alg_req { u32 *fw_req; struct adf_etr_ring_data *tx_ring; + struct crypto_async_request *base; + struct list_head list; + struct qat_instance_backlog *backlog; }; struct qat_crypto_instance { @@ -24,6 +32,7 @@ struct qat_crypto_instance { unsigned long state; int id; atomic_t refctr; + struct qat_instance_backlog backlog; }; #define QAT_MAX_BUFF_DESC 4 @@ -82,6 +91,7 @@ struct qat_crypto_request { u8 iv[AES_BLOCK_SIZE]; }; bool encryption; + struct qat_alg_req alg_req; }; static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev) From 80a52e1ee7757b742f96bfb0d58f0c14eb6583d0 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:11 +0100 Subject: [PATCH 107/116] crypto: qat - fix memory leak in RSA When an RSA key represented in form 2 (as defined in PKCS #1 V2.1) is used, some components of the private key persist even after the TFM is released. Replace the explicit calls to free the buffers in qat_rsa_exit_tfm() with a call to qat_rsa_clear_ctx() which frees all buffers referenced in the TFM context. Cc: stable@vger.kernel.org Fixes: 879f77e9071f ("crypto: qat - Add RSA CRT mode") Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index ff7249c093c9..2bc02c75398e 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -1257,18 +1257,8 @@ static void qat_rsa_exit_tfm(struct crypto_akcipher *tfm) struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct device *dev = &GET_DEV(ctx->inst->accel_dev); - if (ctx->n) - dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n); - if (ctx->e) - dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e); - if (ctx->d) { - memset(ctx->d, '\0', ctx->key_sz); - dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); - } + qat_rsa_clear_ctx(dev, ctx); qat_crypto_put_instance(ctx->inst); - ctx->n = NULL; - ctx->e = NULL; - ctx->d = NULL; } static struct akcipher_alg rsa = { From 3dfaf0071ed74d7a9c6b3c9ea4df7a6f8e423c2a Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:12 +0100 Subject: [PATCH 108/116] crypto: qat - remove dma_free_coherent() for RSA After commit f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP"), if the algorithms are enabled, the driver crashes with a BUG_ON while executing vunmap() in the context of a tasklet. This is due to the fact that the function dma_free_coherent() cannot be called in an interrupt context (see Documentation/core-api/dma-api-howto.rst). The functions qat_rsa_enc() and qat_rsa_dec() allocate memory with dma_alloc_coherent() if the source or the destination buffers are made of multiple flat buffers or of a size that is not compatible with the hardware. This memory is then freed with dma_free_coherent() in the context of a tasklet invoked to handle the response for the corresponding request. Replace allocations with dma_alloc_coherent() in the functions qat_rsa_enc() and qat_rsa_dec() with kmalloc() + dma_map_single(). Cc: stable@vger.kernel.org Fixes: a990532023b9 ("crypto: qat - Add support for RSA algorithm") Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 137 ++++++++---------- 1 file changed, 60 insertions(+), 77 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 2bc02c75398e..b31372bddb96 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -529,25 +529,22 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; - if (req->src_align) - dma_free_coherent(dev, req->ctx.rsa->key_sz, req->src_align, - req->in.rsa.enc.m); - else - dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz, - DMA_TO_DEVICE); + kfree_sensitive(req->src_align); + + dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz, + DMA_TO_DEVICE); areq->dst_len = req->ctx.rsa->key_sz; if (req->dst_align) { scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, areq->dst_len, 1); - dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align, - req->out.rsa.enc.c); - } else { - dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, - DMA_FROM_DEVICE); + kfree_sensitive(req->dst_align); } + dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, + DMA_FROM_DEVICE); + dma_unmap_single(dev, req->phy_in, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); dma_unmap_single(dev, req->phy_out, @@ -664,6 +661,7 @@ static int qat_rsa_enc(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; + u8 *vaddr; int ret; if (unlikely(!ctx->n || !ctx->e)) @@ -701,40 +699,39 @@ static int qat_rsa_enc(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.rsa.enc.m = dma_map_single(dev, sg_virt(req->src), - req->src_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m))) - return ret; - + vaddr = sg_virt(req->src); } else { int shift = ctx->key_sz - req->src_len; - qat_req->src_align = dma_alloc_coherent(dev, ctx->key_sz, - &qat_req->in.rsa.enc.m, - GFP_KERNEL); + qat_req->src_align = kzalloc(ctx->key_sz, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; scatterwalk_map_and_copy(qat_req->src_align + shift, req->src, 0, req->src_len, 0); + vaddr = qat_req->src_align; } + + qat_req->in.rsa.enc.m = dma_map_single(dev, vaddr, ctx->key_sz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m))) + goto unmap_src; + if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.rsa.enc.c = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); - - if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c))) - goto unmap_src; - + vaddr = sg_virt(req->dst); } else { - qat_req->dst_align = dma_alloc_coherent(dev, ctx->key_sz, - &qat_req->out.rsa.enc.c, - GFP_KERNEL); + qat_req->dst_align = kzalloc(ctx->key_sz, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; - + vaddr = qat_req->dst_align; } + + qat_req->out.rsa.enc.c = dma_map_single(dev, vaddr, ctx->key_sz, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c))) + goto unmap_dst; + qat_req->in.rsa.in_tab[3] = 0; qat_req->out.rsa.out_tab[1] = 0; qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m, @@ -772,21 +769,15 @@ static int qat_rsa_enc(struct akcipher_request *req) sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); unmap_dst: - if (qat_req->dst_align) - dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.rsa.enc.c); - else - if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c)) - dma_unmap_single(dev, qat_req->out.rsa.enc.c, - ctx->key_sz, DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c)) + dma_unmap_single(dev, qat_req->out.rsa.enc.c, + ctx->key_sz, DMA_FROM_DEVICE); + kfree_sensitive(qat_req->dst_align); unmap_src: - if (qat_req->src_align) - dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.rsa.enc.m); - else - if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m)) - dma_unmap_single(dev, qat_req->in.rsa.enc.m, - ctx->key_sz, DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m)) + dma_unmap_single(dev, qat_req->in.rsa.enc.m, ctx->key_sz, + DMA_TO_DEVICE); + kfree_sensitive(qat_req->src_align); return ret; } @@ -799,6 +790,7 @@ static int qat_rsa_dec(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; + u8 *vaddr; int ret; if (unlikely(!ctx->n || !ctx->d)) @@ -846,40 +838,37 @@ static int qat_rsa_dec(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.rsa.dec.c = dma_map_single(dev, sg_virt(req->src), - req->dst_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c))) - return ret; - + vaddr = sg_virt(req->src); } else { int shift = ctx->key_sz - req->src_len; - qat_req->src_align = dma_alloc_coherent(dev, ctx->key_sz, - &qat_req->in.rsa.dec.c, - GFP_KERNEL); + qat_req->src_align = kzalloc(ctx->key_sz, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; scatterwalk_map_and_copy(qat_req->src_align + shift, req->src, 0, req->src_len, 0); + vaddr = qat_req->src_align; } + + qat_req->in.rsa.dec.c = dma_map_single(dev, vaddr, ctx->key_sz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c))) + goto unmap_src; + if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.rsa.dec.m = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); - - if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m))) - goto unmap_src; - + vaddr = sg_virt(req->dst); } else { - qat_req->dst_align = dma_alloc_coherent(dev, ctx->key_sz, - &qat_req->out.rsa.dec.m, - GFP_KERNEL); + qat_req->dst_align = kzalloc(ctx->key_sz, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; - + vaddr = qat_req->dst_align; } + qat_req->out.rsa.dec.m = dma_map_single(dev, vaddr, ctx->key_sz, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m))) + goto unmap_dst; if (ctx->crt_mode) qat_req->in.rsa.in_tab[6] = 0; @@ -925,21 +914,15 @@ static int qat_rsa_dec(struct akcipher_request *req) sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); unmap_dst: - if (qat_req->dst_align) - dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.rsa.dec.m); - else - if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m)) - dma_unmap_single(dev, qat_req->out.rsa.dec.m, - ctx->key_sz, DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m)) + dma_unmap_single(dev, qat_req->out.rsa.dec.m, + ctx->key_sz, DMA_FROM_DEVICE); + kfree_sensitive(qat_req->dst_align); unmap_src: - if (qat_req->src_align) - dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.rsa.dec.c); - else - if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c)) - dma_unmap_single(dev, qat_req->in.rsa.dec.c, - ctx->key_sz, DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c)) + dma_unmap_single(dev, qat_req->in.rsa.dec.c, ctx->key_sz, + DMA_TO_DEVICE); + kfree_sensitive(qat_req->src_align); return ret; } From 029aa4624a7fe35233bdd3d1354dc7be260380bf Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:13 +0100 Subject: [PATCH 109/116] crypto: qat - remove dma_free_coherent() for DH The functions qat_dh_compute_value() allocates memory with dma_alloc_coherent() if the source or the destination buffers are made of multiple flat buffers or of a size that is not compatible with the hardware. This memory is then freed with dma_free_coherent() in the context of a tasklet invoked to handle the response for the corresponding request. According to Documentation/core-api/dma-api-howto.rst, the function dma_free_coherent() cannot be called in an interrupt context. Replace allocations with dma_alloc_coherent() in the function qat_dh_compute_value() with kmalloc() + dma_map_single(). Cc: stable@vger.kernel.org Fixes: c9839143ebbf ("crypto: qat - Add DH support") Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 83 ++++++++----------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index b31372bddb96..25bbd22085c3 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -164,26 +164,21 @@ static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; if (areq->src) { - if (req->src_align) - dma_free_coherent(dev, req->ctx.dh->p_size, - req->src_align, req->in.dh.in.b); - else - dma_unmap_single(dev, req->in.dh.in.b, - req->ctx.dh->p_size, DMA_TO_DEVICE); + dma_unmap_single(dev, req->in.dh.in.b, req->ctx.dh->p_size, + DMA_TO_DEVICE); + kfree_sensitive(req->src_align); } areq->dst_len = req->ctx.dh->p_size; if (req->dst_align) { scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, areq->dst_len, 1); - - dma_free_coherent(dev, req->ctx.dh->p_size, req->dst_align, - req->out.dh.r); - } else { - dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size, - DMA_FROM_DEVICE); + kfree_sensitive(req->dst_align); } + dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size, + DMA_FROM_DEVICE); + dma_unmap_single(dev, req->phy_in, sizeof(struct qat_dh_input_params), DMA_TO_DEVICE); dma_unmap_single(dev, req->phy_out, @@ -231,6 +226,7 @@ static int qat_dh_compute_value(struct kpp_request *req) struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret; int n_input_params = 0; + u8 *vaddr; if (unlikely(!ctx->xa)) return -EINVAL; @@ -287,27 +283,24 @@ static int qat_dh_compute_value(struct kpp_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->p_size) { qat_req->src_align = NULL; - qat_req->in.dh.in.b = dma_map_single(dev, - sg_virt(req->src), - req->src_len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, - qat_req->in.dh.in.b))) - return ret; - + vaddr = sg_virt(req->src); } else { int shift = ctx->p_size - req->src_len; - qat_req->src_align = dma_alloc_coherent(dev, - ctx->p_size, - &qat_req->in.dh.in.b, - GFP_KERNEL); + qat_req->src_align = kzalloc(ctx->p_size, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; scatterwalk_map_and_copy(qat_req->src_align + shift, req->src, 0, req->src_len, 0); + + vaddr = qat_req->src_align; } + + qat_req->in.dh.in.b = dma_map_single(dev, vaddr, ctx->p_size, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->in.dh.in.b))) + goto unmap_src; } /* * dst can be of any size in valid range, but HW expects it to be the @@ -318,20 +311,18 @@ static int qat_dh_compute_value(struct kpp_request *req) */ if (sg_is_last(req->dst) && req->dst_len == ctx->p_size) { qat_req->dst_align = NULL; - qat_req->out.dh.r = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); - - if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r))) - goto unmap_src; - + vaddr = sg_virt(req->dst); } else { - qat_req->dst_align = dma_alloc_coherent(dev, ctx->p_size, - &qat_req->out.dh.r, - GFP_KERNEL); + qat_req->dst_align = kzalloc(ctx->p_size, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; + + vaddr = qat_req->dst_align; } + qat_req->out.dh.r = dma_map_single(dev, vaddr, ctx->p_size, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r))) + goto unmap_dst; qat_req->in.dh.in_tab[n_input_params] = 0; qat_req->out.dh.out_tab[1] = 0; @@ -371,23 +362,17 @@ static int qat_dh_compute_value(struct kpp_request *req) sizeof(struct qat_dh_input_params), DMA_TO_DEVICE); unmap_dst: - if (qat_req->dst_align) - dma_free_coherent(dev, ctx->p_size, qat_req->dst_align, - qat_req->out.dh.r); - else - if (!dma_mapping_error(dev, qat_req->out.dh.r)) - dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.dh.r)) + dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size, + DMA_FROM_DEVICE); + kfree_sensitive(qat_req->dst_align); unmap_src: if (req->src) { - if (qat_req->src_align) - dma_free_coherent(dev, ctx->p_size, qat_req->src_align, - qat_req->in.dh.in.b); - else - if (!dma_mapping_error(dev, qat_req->in.dh.in.b)) - dma_unmap_single(dev, qat_req->in.dh.in.b, - ctx->p_size, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.dh.in.b)) + dma_unmap_single(dev, qat_req->in.dh.in.b, + ctx->p_size, + DMA_TO_DEVICE); + kfree_sensitive(qat_req->src_align); } return ret; } From 9714061423b8b24b8afb31b8eb4df977c63f19c4 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:14 +0100 Subject: [PATCH 110/116] crypto: qat - add param check for RSA Reject requests with a source buffer that is bigger than the size of the key. This is to prevent a possible integer underflow that might happen when copying the source scatterlist into a linear buffer. Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 25bbd22085c3..947eeff181b4 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -656,6 +656,10 @@ static int qat_rsa_enc(struct akcipher_request *req) req->dst_len = ctx->key_sz; return -EOVERFLOW; } + + if (req->src_len > ctx->key_sz) + return -EINVAL; + memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); @@ -785,6 +789,10 @@ static int qat_rsa_dec(struct akcipher_request *req) req->dst_len = ctx->key_sz; return -EOVERFLOW; } + + if (req->src_len > ctx->key_sz) + return -EINVAL; + memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); From 2acbb8771f6ac82422886e63832ee7a0f4b1635b Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:15 +0100 Subject: [PATCH 111/116] crypto: qat - add param check for DH Reject requests with a source buffer that is bigger than the size of the key. This is to prevent a possible integer underflow that might happen when copying the source scatterlist into a linear buffer. Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 947eeff181b4..7173a2a0a484 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -235,6 +235,10 @@ static int qat_dh_compute_value(struct kpp_request *req) req->dst_len = ctx->p_size; return -EOVERFLOW; } + + if (req->src_len > ctx->p_size) + return -EINVAL; + memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); From 8fb203c65a795b96faa1836b5086a5d6eb5c5e99 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:16 +0100 Subject: [PATCH 112/116] crypto: qat - honor CRYPTO_TFM_REQ_MAY_SLEEP flag If a request has the flag CRYPTO_TFM_REQ_MAY_SLEEP set, allocate memory using the flag GFP_KERNEL otherwise use GFP_ATOMIC. Signed-off-by: Giovanni Cabiddu Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 19 ++++++++++++------- drivers/crypto/qat/qat_common/qat_asym_algs.c | 17 ++++++++++------- drivers/crypto/qat/qat_common/qat_crypto.h | 5 +++++ 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 873533dc43a7..148edbe379e3 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -703,7 +703,8 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, struct scatterlist *sgl, struct scatterlist *sglout, - struct qat_crypto_request *qat_req) + struct qat_crypto_request *qat_req, + gfp_t flags) { struct device *dev = &GET_DEV(inst->accel_dev); int i, sg_nctr = 0; @@ -723,7 +724,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, qat_req->buf.sgl_dst_valid = false; if (n > QAT_MAX_BUFF_DESC) { - bufl = kzalloc_node(sz, GFP_ATOMIC, node); + bufl = kzalloc_node(sz, flags, node); if (unlikely(!bufl)) return -ENOMEM; } else { @@ -765,7 +766,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, sg_nctr = 0; if (n > QAT_MAX_BUFF_DESC) { - buflout = kzalloc_node(sz_out, GFP_ATOMIC, node); + buflout = kzalloc_node(sz_out, flags, node); if (unlikely(!buflout)) goto err_in; } else { @@ -966,6 +967,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) struct icp_qat_fw_la_auth_req_params *auth_param; struct icp_qat_fw_la_bulk_req *msg; int digst_size = crypto_aead_authsize(aead_tfm); + gfp_t f = qat_algs_alloc_flags(&areq->base); int ret; u32 cipher_len; @@ -973,7 +975,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) if (cipher_len % AES_BLOCK_SIZE != 0) return -EINVAL; - ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req); + ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); if (unlikely(ret)) return ret; @@ -1008,6 +1010,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) struct qat_crypto_request *qat_req = aead_request_ctx(areq); struct icp_qat_fw_la_cipher_req_params *cipher_param; struct icp_qat_fw_la_auth_req_params *auth_param; + gfp_t f = qat_algs_alloc_flags(&areq->base); struct icp_qat_fw_la_bulk_req *msg; u8 *iv = areq->iv; int ret; @@ -1015,7 +1018,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) if (areq->cryptlen % AES_BLOCK_SIZE != 0) return -EINVAL; - ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req); + ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); if (unlikely(ret)) return ret; @@ -1193,13 +1196,14 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm); struct qat_crypto_request *qat_req = skcipher_request_ctx(req); struct icp_qat_fw_la_cipher_req_params *cipher_param; + gfp_t f = qat_algs_alloc_flags(&req->base); struct icp_qat_fw_la_bulk_req *msg; int ret; if (req->cryptlen == 0) return 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req); + ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); if (unlikely(ret)) return ret; @@ -1258,13 +1262,14 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm); struct qat_crypto_request *qat_req = skcipher_request_ctx(req); struct icp_qat_fw_la_cipher_req_params *cipher_param; + gfp_t f = qat_algs_alloc_flags(&req->base); struct icp_qat_fw_la_bulk_req *msg; int ret; if (req->cryptlen == 0) return 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req); + ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); if (unlikely(ret)) return ret; diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 7173a2a0a484..16d97db9ea15 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -224,9 +224,10 @@ static int qat_dh_compute_value(struct kpp_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(kpp_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; - int ret; + gfp_t flags = qat_algs_alloc_flags(&req->base); int n_input_params = 0; u8 *vaddr; + int ret; if (unlikely(!ctx->xa)) return -EINVAL; @@ -291,7 +292,7 @@ static int qat_dh_compute_value(struct kpp_request *req) } else { int shift = ctx->p_size - req->src_len; - qat_req->src_align = kzalloc(ctx->p_size, GFP_KERNEL); + qat_req->src_align = kzalloc(ctx->p_size, flags); if (unlikely(!qat_req->src_align)) return ret; @@ -317,7 +318,7 @@ static int qat_dh_compute_value(struct kpp_request *req) qat_req->dst_align = NULL; vaddr = sg_virt(req->dst); } else { - qat_req->dst_align = kzalloc(ctx->p_size, GFP_KERNEL); + qat_req->dst_align = kzalloc(ctx->p_size, flags); if (unlikely(!qat_req->dst_align)) goto unmap_src; @@ -650,6 +651,7 @@ static int qat_rsa_enc(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; + gfp_t flags = qat_algs_alloc_flags(&req->base); u8 *vaddr; int ret; @@ -696,7 +698,7 @@ static int qat_rsa_enc(struct akcipher_request *req) } else { int shift = ctx->key_sz - req->src_len; - qat_req->src_align = kzalloc(ctx->key_sz, GFP_KERNEL); + qat_req->src_align = kzalloc(ctx->key_sz, flags); if (unlikely(!qat_req->src_align)) return ret; @@ -714,7 +716,7 @@ static int qat_rsa_enc(struct akcipher_request *req) qat_req->dst_align = NULL; vaddr = sg_virt(req->dst); } else { - qat_req->dst_align = kzalloc(ctx->key_sz, GFP_KERNEL); + qat_req->dst_align = kzalloc(ctx->key_sz, flags); if (unlikely(!qat_req->dst_align)) goto unmap_src; vaddr = qat_req->dst_align; @@ -783,6 +785,7 @@ static int qat_rsa_dec(struct akcipher_request *req) struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; + gfp_t flags = qat_algs_alloc_flags(&req->base); u8 *vaddr; int ret; @@ -839,7 +842,7 @@ static int qat_rsa_dec(struct akcipher_request *req) } else { int shift = ctx->key_sz - req->src_len; - qat_req->src_align = kzalloc(ctx->key_sz, GFP_KERNEL); + qat_req->src_align = kzalloc(ctx->key_sz, flags); if (unlikely(!qat_req->src_align)) return ret; @@ -857,7 +860,7 @@ static int qat_rsa_dec(struct akcipher_request *req) qat_req->dst_align = NULL; vaddr = sg_virt(req->dst); } else { - qat_req->dst_align = kzalloc(ctx->key_sz, GFP_KERNEL); + qat_req->dst_align = kzalloc(ctx->key_sz, flags); if (unlikely(!qat_req->dst_align)) goto unmap_src; vaddr = qat_req->dst_align; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index 245b6d9a3650..df3c738ce323 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -109,4 +109,9 @@ static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev) return true; } +static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req) +{ + return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; +} + #endif From d09144745959bf7852ccafd73243dd7d1eaeb163 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 9 May 2022 14:34:17 +0100 Subject: [PATCH 113/116] crypto: qat - re-enable registration of algorithms Re-enable the registration of algorithms after fixes to (1) use pre-allocated buffers in the datapath and (2) support the CRYPTO_TFM_REQ_MAY_BACKLOG flag. This reverts commit 8893d27ffcaf6ec6267038a177cb87bcde4dd3de. Cc: stable@vger.kernel.org Signed-off-by: Giovanni Cabiddu Reviewed-by: Marco Chiappero Reviewed-by: Adam Guerin Reviewed-by: Wojciech Ziemba Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_drv.c | 7 ------- drivers/crypto/qat/qat_common/qat_crypto.c | 7 ------- 2 files changed, 14 deletions(-) diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index fa4c350c1bf9..a6c78b9c730b 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -75,13 +75,6 @@ static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; - /* Temporarily set the number of crypto instances to zero to avoid - * registering the crypto algorithms. - * This will be removed when the algorithms will support the - * CRYPTO_TFM_REQ_MAY_BACKLOG flag - */ - instances = 0; - for (i = 0; i < instances; i++) { val = i; bank = i * 2; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 80d905ed102e..9341d892533a 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -161,13 +161,6 @@ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; - /* Temporarily set the number of crypto instances to zero to avoid - * registering the crypto algorithms. - * This will be removed when the algorithms will support the - * CRYPTO_TFM_REQ_MAY_BACKLOG flag - */ - instances = 0; - for (i = 0; i < instances; i++) { val = i; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); From fbdab61af2d02ebf9c015458c95e34b54dea9027 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Tue, 10 May 2022 17:54:19 +0100 Subject: [PATCH 114/116] crypto: qat - add support for 401xx devices QAT_401xx is a derivative of 4xxx. Add support for that device in the qat_4xxx driver by including the DIDs (both PF and VF), extending the probe and the firmware loader. Signed-off-by: Giovanni Cabiddu Reviewed-by: Srinivas Kerekare Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_drv.c | 1 + drivers/crypto/qat/qat_common/adf_accel_devices.h | 2 ++ drivers/crypto/qat/qat_common/qat_hal.c | 1 + drivers/crypto/qat/qat_common/qat_uclo.c | 1 + 4 files changed, 5 insertions(+) diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index a6c78b9c730b..181fa1c8b3c7 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -14,6 +14,7 @@ static const struct pci_device_id adf_pci_tbl[] = { { PCI_VDEVICE(INTEL, ADF_4XXX_PCI_DEVICE_ID), }, + { PCI_VDEVICE(INTEL, ADF_401XX_PCI_DEVICE_ID), }, { } }; MODULE_DEVICE_TABLE(pci, adf_pci_tbl); diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index e927799a8e6c..ede6458c9dbf 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -19,6 +19,8 @@ #define ADF_4XXX_DEVICE_NAME "4xxx" #define ADF_4XXX_PCI_DEVICE_ID 0x4940 #define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941 +#define ADF_401XX_PCI_DEVICE_ID 0x4942 +#define ADF_401XXIOV_PCI_DEVICE_ID 0x4943 #define ADF_DEVICE_FUSECTL_OFFSET 0x40 #define ADF_DEVICE_LEGFUSE_OFFSET 0x4C #define ADF_DEVICE_FUSECTL_MASK 0x80000000 diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index 4bfd8f3566f7..7bba35280dac 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c @@ -695,6 +695,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle, handle->pci_dev = pci_info->pci_dev; switch (handle->pci_dev->device) { case ADF_4XXX_PCI_DEVICE_ID: + case ADF_401XX_PCI_DEVICE_ID: handle->chip_info->mmp_sram_size = 0; handle->chip_info->nn = false; handle->chip_info->lm2lm3 = true; diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c index 4b6f37d6e85b..0fe5a474aa45 100644 --- a/drivers/crypto/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/qat/qat_common/qat_uclo.c @@ -731,6 +731,7 @@ qat_uclo_get_dev_type(struct icp_qat_fw_loader_handle *handle) case PCI_DEVICE_ID_INTEL_QAT_C3XXX: return ICP_QAT_AC_C3XXX_DEV_TYPE; case ADF_4XXX_PCI_DEVICE_ID: + case ADF_401XX_PCI_DEVICE_ID: return ICP_QAT_AC_4XXX_A_DEV_TYPE; default: pr_err("QAT: unsupported device 0x%x\n", From 2d33f5771b513f7dfb819563d4b38b687f2a4982 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Fri, 13 May 2022 16:02:44 +0800 Subject: [PATCH 115/116] crypto: hisilicon/sec - delete the flag CRYPTO_ALG_ALLOCATES_MEMORY Should not to uses the CRYPTO_ALG_ALLOCATES_MEMORY in SEC2. The SEC2 driver uses the pre-allocated buffers, including the src sgl pool, dst sgl pool and other qp ctx resources. (e.g. IV buffer, mac buffer, key buffer). The SEC2 driver doesn't allocate memory during request processing. The driver only maps software sgl to allocated hardware sgl during I/O. So here is fix it. Signed-off-by: Kai Ye Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index a91635c348b5..6eebe739893c 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -2113,7 +2113,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req) .cra_driver_name = "hisi_sec_"sec_cra_name,\ .cra_priority = SEC_PRIORITY,\ .cra_flags = CRYPTO_ALG_ASYNC |\ - CRYPTO_ALG_ALLOCATES_MEMORY |\ CRYPTO_ALG_NEED_FALLBACK,\ .cra_blocksize = blk_size,\ .cra_ctxsize = sizeof(struct sec_ctx),\ @@ -2366,7 +2365,6 @@ static int sec_aead_decrypt(struct aead_request *a_req) .cra_driver_name = "hisi_sec_"sec_cra_name,\ .cra_priority = SEC_PRIORITY,\ .cra_flags = CRYPTO_ALG_ASYNC |\ - CRYPTO_ALG_ALLOCATES_MEMORY |\ CRYPTO_ALG_NEED_FALLBACK,\ .cra_blocksize = blk_size,\ .cra_ctxsize = sizeof(struct sec_ctx),\ From e4e62bbc6aba49a5edb3156ec65f6698ff37d228 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 14 May 2022 16:42:41 +0800 Subject: [PATCH 116/116] hwrng: omap3-rom - fix using wrong clk_disable() in omap_rom_rng_runtime_resume() 'ddata->clk' is enabled by clk_prepare_enable(), it should be disabled by clk_disable_unprepare(). Fixes: 8d9d4bdc495f ("hwrng: omap3-rom - Use runtime PM instead of custom functions") Signed-off-by: Yang Yingliang Signed-off-by: Herbert Xu --- drivers/char/hw_random/omap3-rom-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index e0d77fa048fb..f06e4f95114f 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -92,7 +92,7 @@ static int __maybe_unused omap_rom_rng_runtime_resume(struct device *dev) r = ddata->rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT); if (r != 0) { - clk_disable(ddata->clk); + clk_disable_unprepare(ddata->clk); dev_err(dev, "HW init failed: %d\n", r); return -EIO;