This update includes the following changes:

API:
 
 - Test in-place en/decryption with two sglists in testmgr.
 - Fix process vs. softirq race in cryptd.
 
 Algorithms:
 
 - Add arm64 acceleration for sm4.
 - Add s390 acceleration for chacha20.
 
 Drivers:
 
 - Add polarfire soc hwrng support in mpsf.
 - Add support for TI SoC AM62x in sa2ul.
 - Add support for ATSHA204 cryptochip in atmel-sha204a.
 - Add support for PRNG in caam.
 - Restore support for storage encryption in qat.
 - Restore support for storage encryption in hisilicon/sec.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEn51F/lCuNhUwmDeSxycdCkmxi6cFAmKQs9cACgkQxycdCkmx
 i6deOA//bwX9JvxI+SiwEK/1u5GX9VHtCpAa1rMOqhfl8UrBfo0516M/CeUDjW0J
 t1yoq0JUoIfYrEbSJqxXTnfG6+fJ1WsQtT3jd1/64nrwVk+w6OdMBTt48B9GF0R5
 ZjWG7zmjKZcspZqSwib/gxbehJ+IX7dYdUsrlUQq3q64qpQEqxTgqsfyiY3LP24N
 lng6weLudrFA5Xa8pVCmrCnOH3J7kPGA4iGqTGNV8Qx3ud9CUWSc8BT4VdqU8t2f
 opaYL3s9oKc+xtS4yrOnfV+Wa/A8K6AuBYeODFtLe41FSpGYgaPslcGqEGwAHNpL
 0HjqQdC+4auimGJxyVcef7QVMCpGqIfKqYu7sYXuNROylPjqMNa/DRL64csaDxDn
 WiheV9RSc1zfchxHC4IjnfwE7nNDVYnYrZ1awyvQ9xvAoh7bldiEe6k/UlWi3L0F
 nejJRFPXOSZ2GfJjrVNsv5lSWZCNWRBzOehN4D6EMJjEfM/G3/30Q0qwif39QWVj
 r1gYQWmZuCa9mL7enga1XavebQ6cLXggR4sTxEmV7Sta6AJ+RqNqOnrPecEF5Avr
 eSYQLxor+jvhaepcKhyDOF4dKGGJIWaEi00GC83yZ8hApVbfWoVh8Nfxmp8TUEzH
 UUJFvrFLNTBOwRoz3fIT57vaFxksQREZwlcQ77xVAeg8S+BOB4o=
 =oVRe
 -----END PGP SIGNATURE-----

Merge tag 'v5.19-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:

   - Test in-place en/decryption with two sglists in testmgr

   - Fix process vs softirq race in cryptd

  Algorithms:

   - Add arm64 acceleration for sm4

   - Add s390 acceleration for chacha20

  Drivers:

   - Add polarfire soc hwrng support in mpsf

   - Add support for TI SoC AM62x in sa2ul

   - Add support for ATSHA204 cryptochip in atmel-sha204a

   - Add support for PRNG in caam

   - Restore support for storage encryption in qat

   - Restore support for storage encryption in hisilicon/sec"

* tag 'v5.19-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (116 commits)
  hwrng: omap3-rom - fix using wrong clk_disable() in omap_rom_rng_runtime_resume()
  crypto: hisilicon/sec - delete the flag CRYPTO_ALG_ALLOCATES_MEMORY
  crypto: qat - add support for 401xx devices
  crypto: qat - re-enable registration of algorithms
  crypto: qat - honor CRYPTO_TFM_REQ_MAY_SLEEP flag
  crypto: qat - add param check for DH
  crypto: qat - add param check for RSA
  crypto: qat - remove dma_free_coherent() for DH
  crypto: qat - remove dma_free_coherent() for RSA
  crypto: qat - fix memory leak in RSA
  crypto: qat - add backlog mechanism
  crypto: qat - refactor submission logic
  crypto: qat - use pre-allocated buffers in datapath
  crypto: qat - set to zero DH parameters before free
  crypto: s390 - add crypto library interface for ChaCha20
  crypto: talitos - Uniform coding style with defined variable
  crypto: octeontx2 - simplify the return expression of otx2_cpt_aead_cbc_aes_sha_setkey()
  crypto: cryptd - Protect per-CPU resource by disabling BH.
  crypto: sun8i-ce - do not fallback if cryptlen is less than sg length
  crypto: sun8i-ce - rework debugging
  ...
This commit is contained in:
Linus Torvalds 2022-05-27 18:06:49 -07:00
commit d075c0c1be
118 changed files with 5541 additions and 1065 deletions

View File

@ -104,6 +104,20 @@ Description: Dump the status of the QM.
Four states: initiated, started, stopped and closed.
Available for both PF and VF, and take no other effect on HPRE.
What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/diff_regs
Date: Mar 2022
Contact: linux-crypto@vger.kernel.org
Description: QM debug registers(regs) read hardware register value. This
node is used to show the change of the qm register values. This
node can be help users to check the change of register values.
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/diff_regs
Date: Mar 2022
Contact: linux-crypto@vger.kernel.org
Description: HPRE debug registers(regs) read hardware register value. This
node is used to show the change of the register values. This
node can be help users to check the change of register values.
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
Date: Apr 2020
Contact: linux-crypto@vger.kernel.org

View File

@ -84,6 +84,20 @@ Description: Dump the status of the QM.
Four states: initiated, started, stopped and closed.
Available for both PF and VF, and take no other effect on SEC.
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/diff_regs
Date: Mar 2022
Contact: linux-crypto@vger.kernel.org
Description: QM debug registers(regs) read hardware register value. This
node is used to show the change of the qm register values. This
node can be help users to check the change of register values.
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/diff_regs
Date: Mar 2022
Contact: linux-crypto@vger.kernel.org
Description: SEC debug registers(regs) read hardware register value. This
node is used to show the change of the register values. This
node can be help users to check the change of register values.
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
Date: Apr 2020
Contact: linux-crypto@vger.kernel.org

View File

@ -97,6 +97,20 @@ Description: Dump the status of the QM.
Four states: initiated, started, stopped and closed.
Available for both PF and VF, and take no other effect on ZIP.
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/diff_regs
Date: Mar 2022
Contact: linux-crypto@vger.kernel.org
Description: QM debug registers(regs) read hardware register value. This
node is used to show the change of the qm registers value. This
node can be help users to check the change of register values.
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/diff_regs
Date: Mar 2022
Contact: linux-crypto@vger.kernel.org
Description: ZIP debug registers(regs) read hardware register value. This
node is used to show the change of the registers value. this
node can be help users to check the change of register values.
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
Date: Apr 2020
Contact: linux-crypto@vger.kernel.org

View File

@ -0,0 +1,87 @@
What: /sys/bus/pci/devices/<BDF>/fused_part
Date: June 2022
KernelVersion: 5.19
Contact: mario.limonciello@amd.com
Description:
The /sys/bus/pci/devices/<BDF>/fused_part file reports
whether the CPU or APU has been fused to prevent tampering.
0: Not fused
1: Fused
What: /sys/bus/pci/devices/<BDF>/debug_lock_on
Date: June 2022
KernelVersion: 5.19
Contact: mario.limonciello@amd.com
Description:
The /sys/bus/pci/devices/<BDF>/debug_lock_on reports
whether the AMD CPU or APU has been unlocked for debugging.
Possible values:
0: Not locked
1: Locked
What: /sys/bus/pci/devices/<BDF>/tsme_status
Date: June 2022
KernelVersion: 5.19
Contact: mario.limonciello@amd.com
Description:
The /sys/bus/pci/devices/<BDF>/tsme_status file reports
the status of transparent secure memory encryption on AMD systems.
Possible values:
0: Not active
1: Active
What: /sys/bus/pci/devices/<BDF>/anti_rollback_status
Date: June 2022
KernelVersion: 5.19
Contact: mario.limonciello@amd.com
Description:
The /sys/bus/pci/devices/<BDF>/anti_rollback_status file reports
whether the PSP is enforcing rollback protection.
Possible values:
0: Not enforcing
1: Enforcing
What: /sys/bus/pci/devices/<BDF>/rpmc_production_enabled
Date: June 2022
KernelVersion: 5.19
Contact: mario.limonciello@amd.com
Description:
The /sys/bus/pci/devices/<BDF>/rpmc_production_enabled file reports
whether Replay Protected Monotonic Counter support has been enabled.
Possible values:
0: Not enabled
1: Enabled
What: /sys/bus/pci/devices/<BDF>/rpmc_spirom_available
Date: June 2022
KernelVersion: 5.19
Contact: mario.limonciello@amd.com
Description:
The /sys/bus/pci/devices/<BDF>/rpmc_spirom_available file reports
whether an Replay Protected Monotonic Counter supported SPI is installed
on the system.
Possible values:
0: Not present
1: Present
What: /sys/bus/pci/devices/<BDF>/hsp_tpm_available
Date: June 2022
KernelVersion: 5.19
Contact: mario.limonciello@amd.com
Description:
The /sys/bus/pci/devices/<BDF>/hsp_tpm_available file reports
whether the HSP TPM has been activated.
Possible values:
0: Not activated or present
1: Activated
What: /sys/bus/pci/devices/<BDF>/rom_armor_enforced
Date: June 2022
KernelVersion: 5.19
Contact: mario.limonciello@amd.com
Description:
The /sys/bus/pci/devices/<BDF>/rom_armor_enforced file reports
whether RomArmor SPI protection is enforced.
Possible values:
0: Not enforced
1: Enforced

View File

@ -15,6 +15,7 @@ properties:
- ti,j721e-sa2ul
- ti,am654-sa2ul
- ti,am64-sa2ul
- ti,am62-sa3ul
reg:
maxItems: 1

View File

@ -47,7 +47,9 @@ properties:
- at,24c08
# i2c trusted platform module (TPM)
- atmel,at97sc3204t
# i2c h/w symmetric crypto module
# ATSHA204 - i2c h/w symmetric crypto module
- atmel,atsha204
# ATSHA204A - i2c h/w symmetric crypto module
- atmel,atsha204a
# i2c h/w elliptic curve crypto module
- atmel,atecc508a

View File

@ -45,13 +45,25 @@ config CRYPTO_SM3_ARM64_CE
tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_LIB_SM3
select CRYPTO_SM3
config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
select CRYPTO_SM4
config CRYPTO_SM4_ARM64_CE_BLK
tristate "SM4 in ECB/CBC/CFB/CTR modes using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
select CRYPTO_SM4
config CRYPTO_SM4_ARM64_NEON_BLK
tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions"
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
select CRYPTO_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"

View File

@ -20,9 +20,15 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce-cipher.o
sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o
sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o

View File

@ -0,0 +1,36 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/linkage.h>
#include <asm/assembler.h>
.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8
.set .Lv\b\().4s, \b
.endr
.macro sm4e, rd, rn
.inst 0xcec08400 | .L\rd | (.L\rn << 5)
.endm
/*
* void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
*/
.text
SYM_FUNC_START(sm4_ce_do_crypt)
ld1 {v8.4s}, [x2]
ld1 {v0.4s-v3.4s}, [x0], #64
CPU_LE( rev32 v8.16b, v8.16b )
ld1 {v4.4s-v7.4s}, [x0]
sm4e v8.4s, v0.4s
sm4e v8.4s, v1.4s
sm4e v8.4s, v2.4s
sm4e v8.4s, v3.4s
sm4e v8.4s, v4.4s
sm4e v8.4s, v5.4s
sm4e v8.4s, v6.4s
sm4e v8.4s, v7.4s
rev64 v8.4s, v8.4s
ext v8.16b, v8.16b, v8.16b, #8
CPU_LE( rev32 v8.16b, v8.16b )
st1 {v8.4s}, [x1]
ret
SYM_FUNC_END(sm4_ce_do_crypt)

View File

@ -0,0 +1,82 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/sm4.h>
#include <crypto/internal/simd.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/types.h>
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("sm4-ce");
MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
return sm4_expandkey(ctx, key, key_len);
}
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
kernel_neon_end();
}
}
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
kernel_neon_end();
}
}
static struct crypto_alg sm4_ce_alg = {
.cra_name = "sm4",
.cra_driver_name = "sm4-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
.cia_setkey = sm4_ce_setkey,
.cia_encrypt = sm4_ce_encrypt,
.cia_decrypt = sm4_ce_decrypt
}
};
static int __init sm4_ce_mod_init(void)
{
return crypto_register_alg(&sm4_ce_alg);
}
static void __exit sm4_ce_mod_fini(void)
{
crypto_unregister_alg(&sm4_ce_alg);
}
module_cpu_feature_match(SM4, sm4_ce_mod_init);
module_exit(sm4_ce_mod_fini);

View File

@ -1,36 +1,660 @@
// SPDX-License-Identifier: GPL-2.0
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm for ARMv8 with Crypto Extensions
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2022, Alibaba Group.
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8
.set .Lv\b\().4s, \b
.endr
.arch armv8-a+crypto
.macro sm4e, rd, rn
.inst 0xcec08400 | .L\rd | (.L\rn << 5)
.endm
.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 20, 24, 25, 26, 27, 28, 29, 30, 31
.set .Lv\b\().4s, \b
.endr
/*
* void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
.macro sm4e, vd, vn
.inst 0xcec08400 | (.L\vn << 5) | .L\vd
.endm
.macro sm4ekey, vd, vn, vm
.inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd
.endm
/* Register macros */
#define RTMP0 v16
#define RTMP1 v17
#define RTMP2 v18
#define RTMP3 v19
#define RIV v20
/* Helper macros. */
#define PREPARE \
ld1 {v24.16b-v27.16b}, [x0], #64; \
ld1 {v28.16b-v31.16b}, [x0];
#define SM4_CRYPT_BLK(b0) \
rev32 b0.16b, b0.16b; \
sm4e b0.4s, v24.4s; \
sm4e b0.4s, v25.4s; \
sm4e b0.4s, v26.4s; \
sm4e b0.4s, v27.4s; \
sm4e b0.4s, v28.4s; \
sm4e b0.4s, v29.4s; \
sm4e b0.4s, v30.4s; \
sm4e b0.4s, v31.4s; \
rev64 b0.4s, b0.4s; \
ext b0.16b, b0.16b, b0.16b, #8; \
rev32 b0.16b, b0.16b;
#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
sm4e b0.4s, v24.4s; \
sm4e b1.4s, v24.4s; \
sm4e b2.4s, v24.4s; \
sm4e b3.4s, v24.4s; \
sm4e b0.4s, v25.4s; \
sm4e b1.4s, v25.4s; \
sm4e b2.4s, v25.4s; \
sm4e b3.4s, v25.4s; \
sm4e b0.4s, v26.4s; \
sm4e b1.4s, v26.4s; \
sm4e b2.4s, v26.4s; \
sm4e b3.4s, v26.4s; \
sm4e b0.4s, v27.4s; \
sm4e b1.4s, v27.4s; \
sm4e b2.4s, v27.4s; \
sm4e b3.4s, v27.4s; \
sm4e b0.4s, v28.4s; \
sm4e b1.4s, v28.4s; \
sm4e b2.4s, v28.4s; \
sm4e b3.4s, v28.4s; \
sm4e b0.4s, v29.4s; \
sm4e b1.4s, v29.4s; \
sm4e b2.4s, v29.4s; \
sm4e b3.4s, v29.4s; \
sm4e b0.4s, v30.4s; \
sm4e b1.4s, v30.4s; \
sm4e b2.4s, v30.4s; \
sm4e b3.4s, v30.4s; \
sm4e b0.4s, v31.4s; \
sm4e b1.4s, v31.4s; \
sm4e b2.4s, v31.4s; \
sm4e b3.4s, v31.4s; \
rev64 b0.4s, b0.4s; \
rev64 b1.4s, b1.4s; \
rev64 b2.4s, b2.4s; \
rev64 b3.4s, b3.4s; \
ext b0.16b, b0.16b, b0.16b, #8; \
ext b1.16b, b1.16b, b1.16b, #8; \
ext b2.16b, b2.16b, b2.16b, #8; \
ext b3.16b, b3.16b, b3.16b, #8; \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b;
#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
rev32 b4.16b, b4.16b; \
rev32 b5.16b, b5.16b; \
rev32 b6.16b, b6.16b; \
rev32 b7.16b, b7.16b; \
sm4e b0.4s, v24.4s; \
sm4e b1.4s, v24.4s; \
sm4e b2.4s, v24.4s; \
sm4e b3.4s, v24.4s; \
sm4e b4.4s, v24.4s; \
sm4e b5.4s, v24.4s; \
sm4e b6.4s, v24.4s; \
sm4e b7.4s, v24.4s; \
sm4e b0.4s, v25.4s; \
sm4e b1.4s, v25.4s; \
sm4e b2.4s, v25.4s; \
sm4e b3.4s, v25.4s; \
sm4e b4.4s, v25.4s; \
sm4e b5.4s, v25.4s; \
sm4e b6.4s, v25.4s; \
sm4e b7.4s, v25.4s; \
sm4e b0.4s, v26.4s; \
sm4e b1.4s, v26.4s; \
sm4e b2.4s, v26.4s; \
sm4e b3.4s, v26.4s; \
sm4e b4.4s, v26.4s; \
sm4e b5.4s, v26.4s; \
sm4e b6.4s, v26.4s; \
sm4e b7.4s, v26.4s; \
sm4e b0.4s, v27.4s; \
sm4e b1.4s, v27.4s; \
sm4e b2.4s, v27.4s; \
sm4e b3.4s, v27.4s; \
sm4e b4.4s, v27.4s; \
sm4e b5.4s, v27.4s; \
sm4e b6.4s, v27.4s; \
sm4e b7.4s, v27.4s; \
sm4e b0.4s, v28.4s; \
sm4e b1.4s, v28.4s; \
sm4e b2.4s, v28.4s; \
sm4e b3.4s, v28.4s; \
sm4e b4.4s, v28.4s; \
sm4e b5.4s, v28.4s; \
sm4e b6.4s, v28.4s; \
sm4e b7.4s, v28.4s; \
sm4e b0.4s, v29.4s; \
sm4e b1.4s, v29.4s; \
sm4e b2.4s, v29.4s; \
sm4e b3.4s, v29.4s; \
sm4e b4.4s, v29.4s; \
sm4e b5.4s, v29.4s; \
sm4e b6.4s, v29.4s; \
sm4e b7.4s, v29.4s; \
sm4e b0.4s, v30.4s; \
sm4e b1.4s, v30.4s; \
sm4e b2.4s, v30.4s; \
sm4e b3.4s, v30.4s; \
sm4e b4.4s, v30.4s; \
sm4e b5.4s, v30.4s; \
sm4e b6.4s, v30.4s; \
sm4e b7.4s, v30.4s; \
sm4e b0.4s, v31.4s; \
sm4e b1.4s, v31.4s; \
sm4e b2.4s, v31.4s; \
sm4e b3.4s, v31.4s; \
sm4e b4.4s, v31.4s; \
sm4e b5.4s, v31.4s; \
sm4e b6.4s, v31.4s; \
sm4e b7.4s, v31.4s; \
rev64 b0.4s, b0.4s; \
rev64 b1.4s, b1.4s; \
rev64 b2.4s, b2.4s; \
rev64 b3.4s, b3.4s; \
rev64 b4.4s, b4.4s; \
rev64 b5.4s, b5.4s; \
rev64 b6.4s, b6.4s; \
rev64 b7.4s, b7.4s; \
ext b0.16b, b0.16b, b0.16b, #8; \
ext b1.16b, b1.16b, b1.16b, #8; \
ext b2.16b, b2.16b, b2.16b, #8; \
ext b3.16b, b3.16b, b3.16b, #8; \
ext b4.16b, b4.16b, b4.16b, #8; \
ext b5.16b, b5.16b, b5.16b, #8; \
ext b6.16b, b6.16b, b6.16b, #8; \
ext b7.16b, b7.16b, b7.16b, #8; \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
rev32 b4.16b, b4.16b; \
rev32 b5.16b, b5.16b; \
rev32 b6.16b, b6.16b; \
rev32 b7.16b, b7.16b;
.align 3
SYM_FUNC_START(sm4_ce_expand_key)
/* input:
* x0: 128-bit key
* x1: rkey_enc
* x2: rkey_dec
* x3: fk array
* x4: ck array
*/
.text
SYM_FUNC_START(sm4_ce_do_crypt)
ld1 {v8.4s}, [x2]
ld1 {v0.4s-v3.4s}, [x0], #64
CPU_LE( rev32 v8.16b, v8.16b )
ld1 {v4.4s-v7.4s}, [x0]
sm4e v8.4s, v0.4s
sm4e v8.4s, v1.4s
sm4e v8.4s, v2.4s
sm4e v8.4s, v3.4s
sm4e v8.4s, v4.4s
sm4e v8.4s, v5.4s
sm4e v8.4s, v6.4s
sm4e v8.4s, v7.4s
rev64 v8.4s, v8.4s
ext v8.16b, v8.16b, v8.16b, #8
CPU_LE( rev32 v8.16b, v8.16b )
st1 {v8.4s}, [x1]
ret
SYM_FUNC_END(sm4_ce_do_crypt)
ld1 {v0.16b}, [x0];
rev32 v0.16b, v0.16b;
ld1 {v1.16b}, [x3];
/* load ck */
ld1 {v24.16b-v27.16b}, [x4], #64;
ld1 {v28.16b-v31.16b}, [x4];
/* input ^ fk */
eor v0.16b, v0.16b, v1.16b;
sm4ekey v0.4s, v0.4s, v24.4s;
sm4ekey v1.4s, v0.4s, v25.4s;
sm4ekey v2.4s, v1.4s, v26.4s;
sm4ekey v3.4s, v2.4s, v27.4s;
sm4ekey v4.4s, v3.4s, v28.4s;
sm4ekey v5.4s, v4.4s, v29.4s;
sm4ekey v6.4s, v5.4s, v30.4s;
sm4ekey v7.4s, v6.4s, v31.4s;
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b-v7.16b}, [x1];
rev64 v7.4s, v7.4s;
rev64 v6.4s, v6.4s;
rev64 v5.4s, v5.4s;
rev64 v4.4s, v4.4s;
rev64 v3.4s, v3.4s;
rev64 v2.4s, v2.4s;
rev64 v1.4s, v1.4s;
rev64 v0.4s, v0.4s;
ext v7.16b, v7.16b, v7.16b, #8;
ext v6.16b, v6.16b, v6.16b, #8;
ext v5.16b, v5.16b, v5.16b, #8;
ext v4.16b, v4.16b, v4.16b, #8;
ext v3.16b, v3.16b, v3.16b, #8;
ext v2.16b, v2.16b, v2.16b, #8;
ext v1.16b, v1.16b, v1.16b, #8;
ext v0.16b, v0.16b, v0.16b, #8;
st1 {v7.16b}, [x2], #16;
st1 {v6.16b}, [x2], #16;
st1 {v5.16b}, [x2], #16;
st1 {v4.16b}, [x2], #16;
st1 {v3.16b}, [x2], #16;
st1 {v2.16b}, [x2], #16;
st1 {v1.16b}, [x2], #16;
st1 {v0.16b}, [x2];
ret;
SYM_FUNC_END(sm4_ce_expand_key)
.align 3
SYM_FUNC_START(sm4_ce_crypt_block)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
*/
PREPARE;
ld1 {v0.16b}, [x2];
SM4_CRYPT_BLK(v0);
st1 {v0.16b}, [x1];
ret;
SYM_FUNC_END(sm4_ce_crypt_block)
.align 3
SYM_FUNC_START(sm4_ce_crypt)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* w3: nblocks
*/
PREPARE;
.Lcrypt_loop_blk:
sub w3, w3, #8;
tbnz w3, #31, .Lcrypt_tail8;
ld1 {v0.16b-v3.16b}, [x2], #64;
ld1 {v4.16b-v7.16b}, [x2], #64;
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b-v7.16b}, [x1], #64;
cbz w3, .Lcrypt_end;
b .Lcrypt_loop_blk;
.Lcrypt_tail8:
add w3, w3, #8;
cmp w3, #4;
blt .Lcrypt_tail4;
sub w3, w3, #4;
ld1 {v0.16b-v3.16b}, [x2], #64;
SM4_CRYPT_BLK4(v0, v1, v2, v3);
st1 {v0.16b-v3.16b}, [x1], #64;
cbz w3, .Lcrypt_end;
.Lcrypt_tail4:
sub w3, w3, #1;
ld1 {v0.16b}, [x2], #16;
SM4_CRYPT_BLK(v0);
st1 {v0.16b}, [x1], #16;
cbnz w3, .Lcrypt_tail4;
.Lcrypt_end:
ret;
SYM_FUNC_END(sm4_ce_crypt)
.align 3
SYM_FUNC_START(sm4_ce_cbc_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
PREPARE;
ld1 {RIV.16b}, [x3];
.Lcbc_enc_loop:
sub w4, w4, #1;
ld1 {RTMP0.16b}, [x2], #16;
eor RIV.16b, RIV.16b, RTMP0.16b;
SM4_CRYPT_BLK(RIV);
st1 {RIV.16b}, [x1], #16;
cbnz w4, .Lcbc_enc_loop;
/* store new IV */
st1 {RIV.16b}, [x3];
ret;
SYM_FUNC_END(sm4_ce_cbc_enc)
.align 3
SYM_FUNC_START(sm4_ce_cbc_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
PREPARE;
ld1 {RIV.16b}, [x3];
.Lcbc_loop_blk:
sub w4, w4, #8;
tbnz w4, #31, .Lcbc_tail8;
ld1 {v0.16b-v3.16b}, [x2], #64;
ld1 {v4.16b-v7.16b}, [x2];
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
sub x2, x2, #64;
eor v0.16b, v0.16b, RIV.16b;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v1.16b, v1.16b, RTMP0.16b;
eor v2.16b, v2.16b, RTMP1.16b;
eor v3.16b, v3.16b, RTMP2.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
eor v4.16b, v4.16b, RTMP3.16b;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v5.16b, v5.16b, RTMP0.16b;
eor v6.16b, v6.16b, RTMP1.16b;
eor v7.16b, v7.16b, RTMP2.16b;
mov RIV.16b, RTMP3.16b;
st1 {v4.16b-v7.16b}, [x1], #64;
cbz w4, .Lcbc_end;
b .Lcbc_loop_blk;
.Lcbc_tail8:
add w4, w4, #8;
cmp w4, #4;
blt .Lcbc_tail4;
sub w4, w4, #4;
ld1 {v0.16b-v3.16b}, [x2];
SM4_CRYPT_BLK4(v0, v1, v2, v3);
eor v0.16b, v0.16b, RIV.16b;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v1.16b, v1.16b, RTMP0.16b;
eor v2.16b, v2.16b, RTMP1.16b;
eor v3.16b, v3.16b, RTMP2.16b;
mov RIV.16b, RTMP3.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
cbz w4, .Lcbc_end;
.Lcbc_tail4:
sub w4, w4, #1;
ld1 {v0.16b}, [x2];
SM4_CRYPT_BLK(v0);
eor v0.16b, v0.16b, RIV.16b;
ld1 {RIV.16b}, [x2], #16;
st1 {v0.16b}, [x1], #16;
cbnz w4, .Lcbc_tail4;
.Lcbc_end:
/* store new IV */
st1 {RIV.16b}, [x3];
ret;
SYM_FUNC_END(sm4_ce_cbc_dec)
.align 3
SYM_FUNC_START(sm4_ce_cfb_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
PREPARE;
ld1 {RIV.16b}, [x3];
.Lcfb_enc_loop:
sub w4, w4, #1;
SM4_CRYPT_BLK(RIV);
ld1 {RTMP0.16b}, [x2], #16;
eor RIV.16b, RIV.16b, RTMP0.16b;
st1 {RIV.16b}, [x1], #16;
cbnz w4, .Lcfb_enc_loop;
/* store new IV */
st1 {RIV.16b}, [x3];
ret;
SYM_FUNC_END(sm4_ce_cfb_enc)
.align 3
SYM_FUNC_START(sm4_ce_cfb_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
PREPARE;
ld1 {v0.16b}, [x3];
.Lcfb_loop_blk:
sub w4, w4, #8;
tbnz w4, #31, .Lcfb_tail8;
ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48;
ld1 {v4.16b-v7.16b}, [x2];
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
sub x2, x2, #48;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v0.16b, v0.16b, RTMP0.16b;
eor v1.16b, v1.16b, RTMP1.16b;
eor v2.16b, v2.16b, RTMP2.16b;
eor v3.16b, v3.16b, RTMP3.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v4.16b, v4.16b, RTMP0.16b;
eor v5.16b, v5.16b, RTMP1.16b;
eor v6.16b, v6.16b, RTMP2.16b;
eor v7.16b, v7.16b, RTMP3.16b;
st1 {v4.16b-v7.16b}, [x1], #64;
mov v0.16b, RTMP3.16b;
cbz w4, .Lcfb_end;
b .Lcfb_loop_blk;
.Lcfb_tail8:
add w4, w4, #8;
cmp w4, #4;
blt .Lcfb_tail4;
sub w4, w4, #4;
ld1 {v1.16b, v2.16b, v3.16b}, [x2];
SM4_CRYPT_BLK4(v0, v1, v2, v3);
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v0.16b, v0.16b, RTMP0.16b;
eor v1.16b, v1.16b, RTMP1.16b;
eor v2.16b, v2.16b, RTMP2.16b;
eor v3.16b, v3.16b, RTMP3.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
mov v0.16b, RTMP3.16b;
cbz w4, .Lcfb_end;
.Lcfb_tail4:
sub w4, w4, #1;
SM4_CRYPT_BLK(v0);
ld1 {RTMP0.16b}, [x2], #16;
eor v0.16b, v0.16b, RTMP0.16b;
st1 {v0.16b}, [x1], #16;
mov v0.16b, RTMP0.16b;
cbnz w4, .Lcfb_tail4;
.Lcfb_end:
/* store new IV */
st1 {v0.16b}, [x3];
ret;
SYM_FUNC_END(sm4_ce_cfb_dec)
.align 3
SYM_FUNC_START(sm4_ce_ctr_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: ctr (big endian, 128 bit)
* w4: nblocks
*/
PREPARE;
ldp x7, x8, [x3];
rev x7, x7;
rev x8, x8;
.Lctr_loop_blk:
sub w4, w4, #8;
tbnz w4, #31, .Lctr_tail8;
#define inc_le128(vctr) \
mov vctr.d[1], x8; \
mov vctr.d[0], x7; \
adds x8, x8, #1; \
adc x7, x7, xzr; \
rev64 vctr.16b, vctr.16b;
/* construct CTRs */
inc_le128(v0); /* +0 */
inc_le128(v1); /* +1 */
inc_le128(v2); /* +2 */
inc_le128(v3); /* +3 */
inc_le128(v4); /* +4 */
inc_le128(v5); /* +5 */
inc_le128(v6); /* +6 */
inc_le128(v7); /* +7 */
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v0.16b, v0.16b, RTMP0.16b;
eor v1.16b, v1.16b, RTMP1.16b;
eor v2.16b, v2.16b, RTMP2.16b;
eor v3.16b, v3.16b, RTMP3.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v4.16b, v4.16b, RTMP0.16b;
eor v5.16b, v5.16b, RTMP1.16b;
eor v6.16b, v6.16b, RTMP2.16b;
eor v7.16b, v7.16b, RTMP3.16b;
st1 {v4.16b-v7.16b}, [x1], #64;
cbz w4, .Lctr_end;
b .Lctr_loop_blk;
.Lctr_tail8:
add w4, w4, #8;
cmp w4, #4;
blt .Lctr_tail4;
sub w4, w4, #4;
/* construct CTRs */
inc_le128(v0); /* +0 */
inc_le128(v1); /* +1 */
inc_le128(v2); /* +2 */
inc_le128(v3); /* +3 */
SM4_CRYPT_BLK4(v0, v1, v2, v3);
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v0.16b, v0.16b, RTMP0.16b;
eor v1.16b, v1.16b, RTMP1.16b;
eor v2.16b, v2.16b, RTMP2.16b;
eor v3.16b, v3.16b, RTMP3.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
cbz w4, .Lctr_end;
.Lctr_tail4:
sub w4, w4, #1;
/* construct CTRs */
inc_le128(v0);
SM4_CRYPT_BLK(v0);
ld1 {RTMP0.16b}, [x2], #16;
eor v0.16b, v0.16b, RTMP0.16b;
st1 {v0.16b}, [x1], #16;
cbnz w4, .Lctr_tail4;
.Lctr_end:
/* store new CTR */
rev x7, x7;
rev x8, x8;
stp x7, x8, [x3];
ret;
SYM_FUNC_END(sm4_ce_ctr_enc)

View File

@ -1,82 +1,372 @@
// SPDX-License-Identifier: GPL-2.0
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm, using ARMv8 Crypto Extensions
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2022, Alibaba Group.
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/sm4.h>
#include <crypto/internal/simd.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/types.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("sm4-ce");
MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
asmlinkage void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec,
const u32 *fk, const u32 *ck);
asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src);
asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src,
unsigned int nblks);
asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int key_len)
static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_expandkey(ctx, key, key_len);
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
{
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_crypt(rkey, dst, src, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static int sm4_ecb_encrypt(struct skcipher_request *req)
{
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
kernel_neon_begin();
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
kernel_neon_end();
}
return sm4_ecb_do_crypt(req, ctx->rkey_enc);
}
static struct crypto_alg sm4_ce_alg = {
.cra_name = "sm4",
.cra_driver_name = "sm4-ce",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
.cra_u.cipher = {
.cia_min_keysize = SM4_KEY_SIZE,
.cia_max_keysize = SM4_KEY_SIZE,
.cia_setkey = sm4_ce_setkey,
.cia_encrypt = sm4_ce_encrypt,
.cia_decrypt = sm4_ce_decrypt
static int sm4_ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_ecb_do_crypt(req, ctx->rkey_dec);
}
static int sm4_cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_cfb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cfb_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_cfb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cfb_dec(ctx->rkey_enc, dst, src, walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static struct skcipher_alg sm4_algs[] = {
{
.base = {
.cra_name = "ecb(sm4)",
.cra_driver_name = "ecb-sm4-ce",
.cra_priority = 400,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_ecb_encrypt,
.decrypt = sm4_ecb_decrypt,
}, {
.base = {
.cra_name = "cbc(sm4)",
.cra_driver_name = "cbc-sm4-ce",
.cra_priority = 400,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_cbc_encrypt,
.decrypt = sm4_cbc_decrypt,
}, {
.base = {
.cra_name = "cfb(sm4)",
.cra_driver_name = "cfb-sm4-ce",
.cra_priority = 400,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_cfb_encrypt,
.decrypt = sm4_cfb_decrypt,
}, {
.base = {
.cra_name = "ctr(sm4)",
.cra_driver_name = "ctr-sm4-ce",
.cra_priority = 400,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_ctr_crypt,
.decrypt = sm4_ctr_crypt,
}
};
static int __init sm4_ce_mod_init(void)
static int __init sm4_init(void)
{
return crypto_register_alg(&sm4_ce_alg);
return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
static void __exit sm4_ce_mod_fini(void)
static void __exit sm4_exit(void)
{
crypto_unregister_alg(&sm4_ce_alg);
crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
module_cpu_feature_match(SM4, sm4_ce_mod_init);
module_exit(sm4_ce_mod_fini);
module_cpu_feature_match(SM4, sm4_init);
module_exit(sm4_exit);
MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions");
MODULE_ALIAS_CRYPTO("sm4-ce");
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("ecb(sm4)");
MODULE_ALIAS_CRYPTO("cbc(sm4)");
MODULE_ALIAS_CRYPTO("cfb(sm4)");
MODULE_ALIAS_CRYPTO("ctr(sm4)");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_LICENSE("GPL v2");

View File

@ -0,0 +1,487 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm for ARMv8 NEON
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2022, Alibaba Group.
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
/* Register macros */
#define RTMP0 v8
#define RTMP1 v9
#define RTMP2 v10
#define RTMP3 v11
#define RX0 v12
#define RX1 v13
#define RKEY v14
#define RIV v15
/* Helper macros. */
#define PREPARE \
adr_l x5, crypto_sm4_sbox; \
ld1 {v16.16b-v19.16b}, [x5], #64; \
ld1 {v20.16b-v23.16b}, [x5], #64; \
ld1 {v24.16b-v27.16b}, [x5], #64; \
ld1 {v28.16b-v31.16b}, [x5];
#define transpose_4x4(s0, s1, s2, s3) \
zip1 RTMP0.4s, s0.4s, s1.4s; \
zip1 RTMP1.4s, s2.4s, s3.4s; \
zip2 RTMP2.4s, s0.4s, s1.4s; \
zip2 RTMP3.4s, s2.4s, s3.4s; \
zip1 s0.2d, RTMP0.2d, RTMP1.2d; \
zip2 s1.2d, RTMP0.2d, RTMP1.2d; \
zip1 s2.2d, RTMP2.2d, RTMP3.2d; \
zip2 s3.2d, RTMP2.2d, RTMP3.2d;
#define rotate_clockwise_90(s0, s1, s2, s3) \
zip1 RTMP0.4s, s1.4s, s0.4s; \
zip2 RTMP1.4s, s1.4s, s0.4s; \
zip1 RTMP2.4s, s3.4s, s2.4s; \
zip2 RTMP3.4s, s3.4s, s2.4s; \
zip1 s0.2d, RTMP2.2d, RTMP0.2d; \
zip2 s1.2d, RTMP2.2d, RTMP0.2d; \
zip1 s2.2d, RTMP3.2d, RTMP1.2d; \
zip2 s3.2d, RTMP3.2d, RTMP1.2d;
#define ROUND4(round, s0, s1, s2, s3) \
dup RX0.4s, RKEY.s[round]; \
/* rk ^ s1 ^ s2 ^ s3 */ \
eor RTMP1.16b, s2.16b, s3.16b; \
eor RX0.16b, RX0.16b, s1.16b; \
eor RX0.16b, RX0.16b, RTMP1.16b; \
\
/* sbox, non-linear part */ \
movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \
tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \
sub RX0.16b, RX0.16b, RTMP3.16b; \
tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \
sub RX0.16b, RX0.16b, RTMP3.16b; \
tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \
sub RX0.16b, RX0.16b, RTMP3.16b; \
tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \
\
/* linear part */ \
shl RTMP1.4s, RTMP0.4s, #8; \
shl RTMP2.4s, RTMP0.4s, #16; \
shl RTMP3.4s, RTMP0.4s, #24; \
sri RTMP1.4s, RTMP0.4s, #(32-8); \
sri RTMP2.4s, RTMP0.4s, #(32-16); \
sri RTMP3.4s, RTMP0.4s, #(32-24); \
/* RTMP1 = x ^ rol32(x, 8) ^ rol32(x, 16) */ \
eor RTMP1.16b, RTMP1.16b, RTMP0.16b; \
eor RTMP1.16b, RTMP1.16b, RTMP2.16b; \
/* RTMP3 = x ^ rol32(x, 24) ^ rol32(RTMP1, 2) */ \
eor RTMP3.16b, RTMP3.16b, RTMP0.16b; \
shl RTMP2.4s, RTMP1.4s, 2; \
sri RTMP2.4s, RTMP1.4s, #(32-2); \
eor RTMP3.16b, RTMP3.16b, RTMP2.16b; \
/* s0 ^= RTMP3 */ \
eor s0.16b, s0.16b, RTMP3.16b;
#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
\
transpose_4x4(b0, b1, b2, b3); \
\
mov x6, 8; \
4: \
ld1 {RKEY.4s}, [x0], #16; \
subs x6, x6, #1; \
\
ROUND4(0, b0, b1, b2, b3); \
ROUND4(1, b1, b2, b3, b0); \
ROUND4(2, b2, b3, b0, b1); \
ROUND4(3, b3, b0, b1, b2); \
\
bne 4b; \
\
rotate_clockwise_90(b0, b1, b2, b3); \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
\
/* repoint to rkey */ \
sub x0, x0, #128;
#define ROUND8(round, s0, s1, s2, s3, t0, t1, t2, t3) \
/* rk ^ s1 ^ s2 ^ s3 */ \
dup RX0.4s, RKEY.s[round]; \
eor RTMP0.16b, s2.16b, s3.16b; \
mov RX1.16b, RX0.16b; \
eor RTMP1.16b, t2.16b, t3.16b; \
eor RX0.16b, RX0.16b, s1.16b; \
eor RX1.16b, RX1.16b, t1.16b; \
eor RX0.16b, RX0.16b, RTMP0.16b; \
eor RX1.16b, RX1.16b, RTMP1.16b; \
\
/* sbox, non-linear part */ \
movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \
tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \
tbl RTMP1.16b, {v16.16b-v19.16b}, RX1.16b; \
sub RX0.16b, RX0.16b, RTMP3.16b; \
sub RX1.16b, RX1.16b, RTMP3.16b; \
tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \
tbx RTMP1.16b, {v20.16b-v23.16b}, RX1.16b; \
sub RX0.16b, RX0.16b, RTMP3.16b; \
sub RX1.16b, RX1.16b, RTMP3.16b; \
tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \
tbx RTMP1.16b, {v24.16b-v27.16b}, RX1.16b; \
sub RX0.16b, RX0.16b, RTMP3.16b; \
sub RX1.16b, RX1.16b, RTMP3.16b; \
tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \
tbx RTMP1.16b, {v28.16b-v31.16b}, RX1.16b; \
\
/* linear part */ \
shl RX0.4s, RTMP0.4s, #8; \
shl RX1.4s, RTMP1.4s, #8; \
shl RTMP2.4s, RTMP0.4s, #16; \
shl RTMP3.4s, RTMP1.4s, #16; \
sri RX0.4s, RTMP0.4s, #(32 - 8); \
sri RX1.4s, RTMP1.4s, #(32 - 8); \
sri RTMP2.4s, RTMP0.4s, #(32 - 16); \
sri RTMP3.4s, RTMP1.4s, #(32 - 16); \
/* RX = x ^ rol32(x, 8) ^ rol32(x, 16) */ \
eor RX0.16b, RX0.16b, RTMP0.16b; \
eor RX1.16b, RX1.16b, RTMP1.16b; \
eor RX0.16b, RX0.16b, RTMP2.16b; \
eor RX1.16b, RX1.16b, RTMP3.16b; \
/* RTMP0/1 ^= x ^ rol32(x, 24) ^ rol32(RX, 2) */ \
shl RTMP2.4s, RTMP0.4s, #24; \
shl RTMP3.4s, RTMP1.4s, #24; \
sri RTMP2.4s, RTMP0.4s, #(32 - 24); \
sri RTMP3.4s, RTMP1.4s, #(32 - 24); \
eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \
eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \
shl RTMP2.4s, RX0.4s, #2; \
shl RTMP3.4s, RX1.4s, #2; \
sri RTMP2.4s, RX0.4s, #(32 - 2); \
sri RTMP3.4s, RX1.4s, #(32 - 2); \
eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \
eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \
/* s0/t0 ^= RTMP0/1 */ \
eor s0.16b, s0.16b, RTMP0.16b; \
eor t0.16b, t0.16b, RTMP1.16b;
#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
rev32 b4.16b, b4.16b; \
rev32 b5.16b, b5.16b; \
rev32 b6.16b, b6.16b; \
rev32 b7.16b, b7.16b; \
\
transpose_4x4(b0, b1, b2, b3); \
transpose_4x4(b4, b5, b6, b7); \
\
mov x6, 8; \
8: \
ld1 {RKEY.4s}, [x0], #16; \
subs x6, x6, #1; \
\
ROUND8(0, b0, b1, b2, b3, b4, b5, b6, b7); \
ROUND8(1, b1, b2, b3, b0, b5, b6, b7, b4); \
ROUND8(2, b2, b3, b0, b1, b6, b7, b4, b5); \
ROUND8(3, b3, b0, b1, b2, b7, b4, b5, b6); \
\
bne 8b; \
\
rotate_clockwise_90(b0, b1, b2, b3); \
rotate_clockwise_90(b4, b5, b6, b7); \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
rev32 b4.16b, b4.16b; \
rev32 b5.16b, b5.16b; \
rev32 b6.16b, b6.16b; \
rev32 b7.16b, b7.16b; \
\
/* repoint to rkey */ \
sub x0, x0, #128;
.align 3
SYM_FUNC_START_LOCAL(__sm4_neon_crypt_blk1_4)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* w3: num blocks (1..4)
*/
PREPARE;
ld1 {v0.16b}, [x2], #16;
mov v1.16b, v0.16b;
mov v2.16b, v0.16b;
mov v3.16b, v0.16b;
cmp w3, #2;
blt .Lblk4_load_input_done;
ld1 {v1.16b}, [x2], #16;
beq .Lblk4_load_input_done;
ld1 {v2.16b}, [x2], #16;
cmp w3, #3;
beq .Lblk4_load_input_done;
ld1 {v3.16b}, [x2];
.Lblk4_load_input_done:
SM4_CRYPT_BLK4(v0, v1, v2, v3);
st1 {v0.16b}, [x1], #16;
cmp w3, #2;
blt .Lblk4_store_output_done;
st1 {v1.16b}, [x1], #16;
beq .Lblk4_store_output_done;
st1 {v2.16b}, [x1], #16;
cmp w3, #3;
beq .Lblk4_store_output_done;
st1 {v3.16b}, [x1];
.Lblk4_store_output_done:
ret;
SYM_FUNC_END(__sm4_neon_crypt_blk1_4)
.align 3
SYM_FUNC_START(sm4_neon_crypt_blk1_8)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* w3: num blocks (1..8)
*/
cmp w3, #5;
blt __sm4_neon_crypt_blk1_4;
PREPARE;
ld1 {v0.16b-v3.16b}, [x2], #64;
ld1 {v4.16b}, [x2], #16;
mov v5.16b, v4.16b;
mov v6.16b, v4.16b;
mov v7.16b, v4.16b;
beq .Lblk8_load_input_done;
ld1 {v5.16b}, [x2], #16;
cmp w3, #7;
blt .Lblk8_load_input_done;
ld1 {v6.16b}, [x2], #16;
beq .Lblk8_load_input_done;
ld1 {v7.16b}, [x2];
.Lblk8_load_input_done:
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
cmp w3, #6;
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b}, [x1], #16;
blt .Lblk8_store_output_done;
st1 {v5.16b}, [x1], #16;
beq .Lblk8_store_output_done;
st1 {v6.16b}, [x1], #16;
cmp w3, #7;
beq .Lblk8_store_output_done;
st1 {v7.16b}, [x1];
.Lblk8_store_output_done:
ret;
SYM_FUNC_END(sm4_neon_crypt_blk1_8)
.align 3
SYM_FUNC_START(sm4_neon_crypt_blk8)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* w3: nblocks (multiples of 8)
*/
PREPARE;
.Lcrypt_loop_blk:
subs w3, w3, #8;
bmi .Lcrypt_end;
ld1 {v0.16b-v3.16b}, [x2], #64;
ld1 {v4.16b-v7.16b}, [x2], #64;
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b-v7.16b}, [x1], #64;
b .Lcrypt_loop_blk;
.Lcrypt_end:
ret;
SYM_FUNC_END(sm4_neon_crypt_blk8)
.align 3
SYM_FUNC_START(sm4_neon_cbc_dec_blk8)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks (multiples of 8)
*/
PREPARE;
ld1 {RIV.16b}, [x3];
.Lcbc_loop_blk:
subs w4, w4, #8;
bmi .Lcbc_end;
ld1 {v0.16b-v3.16b}, [x2], #64;
ld1 {v4.16b-v7.16b}, [x2];
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
sub x2, x2, #64;
eor v0.16b, v0.16b, RIV.16b;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v1.16b, v1.16b, RTMP0.16b;
eor v2.16b, v2.16b, RTMP1.16b;
eor v3.16b, v3.16b, RTMP2.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
eor v4.16b, v4.16b, RTMP3.16b;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v5.16b, v5.16b, RTMP0.16b;
eor v6.16b, v6.16b, RTMP1.16b;
eor v7.16b, v7.16b, RTMP2.16b;
mov RIV.16b, RTMP3.16b;
st1 {v4.16b-v7.16b}, [x1], #64;
b .Lcbc_loop_blk;
.Lcbc_end:
/* store new IV */
st1 {RIV.16b}, [x3];
ret;
SYM_FUNC_END(sm4_neon_cbc_dec_blk8)
.align 3
SYM_FUNC_START(sm4_neon_cfb_dec_blk8)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks (multiples of 8)
*/
PREPARE;
ld1 {v0.16b}, [x3];
.Lcfb_loop_blk:
subs w4, w4, #8;
bmi .Lcfb_end;
ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48;
ld1 {v4.16b-v7.16b}, [x2];
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
sub x2, x2, #48;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v0.16b, v0.16b, RTMP0.16b;
eor v1.16b, v1.16b, RTMP1.16b;
eor v2.16b, v2.16b, RTMP2.16b;
eor v3.16b, v3.16b, RTMP3.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v4.16b, v4.16b, RTMP0.16b;
eor v5.16b, v5.16b, RTMP1.16b;
eor v6.16b, v6.16b, RTMP2.16b;
eor v7.16b, v7.16b, RTMP3.16b;
st1 {v4.16b-v7.16b}, [x1], #64;
mov v0.16b, RTMP3.16b;
b .Lcfb_loop_blk;
.Lcfb_end:
/* store new IV */
st1 {v0.16b}, [x3];
ret;
SYM_FUNC_END(sm4_neon_cfb_dec_blk8)
.align 3
SYM_FUNC_START(sm4_neon_ctr_enc_blk8)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: ctr (big endian, 128 bit)
* w4: nblocks (multiples of 8)
*/
PREPARE;
ldp x7, x8, [x3];
rev x7, x7;
rev x8, x8;
.Lctr_loop_blk:
subs w4, w4, #8;
bmi .Lctr_end;
#define inc_le128(vctr) \
mov vctr.d[1], x8; \
mov vctr.d[0], x7; \
adds x8, x8, #1; \
adc x7, x7, xzr; \
rev64 vctr.16b, vctr.16b;
/* construct CTRs */
inc_le128(v0); /* +0 */
inc_le128(v1); /* +1 */
inc_le128(v2); /* +2 */
inc_le128(v3); /* +3 */
inc_le128(v4); /* +4 */
inc_le128(v5); /* +5 */
inc_le128(v6); /* +6 */
inc_le128(v7); /* +7 */
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v0.16b, v0.16b, RTMP0.16b;
eor v1.16b, v1.16b, RTMP1.16b;
eor v2.16b, v2.16b, RTMP2.16b;
eor v3.16b, v3.16b, RTMP3.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v4.16b, v4.16b, RTMP0.16b;
eor v5.16b, v5.16b, RTMP1.16b;
eor v6.16b, v6.16b, RTMP2.16b;
eor v7.16b, v7.16b, RTMP3.16b;
st1 {v4.16b-v7.16b}, [x1], #64;
b .Lctr_loop_blk;
.Lctr_end:
/* store new CTR */
rev x7, x7;
rev x8, x8;
stp x7, x8, [x3];
ret;
SYM_FUNC_END(sm4_neon_ctr_enc_blk8)

View File

@ -0,0 +1,442 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm, using ARMv8 NEON
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2022, Alibaba Group.
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
#define BYTES2BLK8(nbytes) (((nbytes) >> 4) & ~(8 - 1))
asmlinkage void sm4_neon_crypt_blk1_8(const u32 *rkey, u8 *dst, const u8 *src,
unsigned int nblks);
asmlinkage void sm4_neon_crypt_blk8(const u32 *rkey, u8 *dst, const u8 *src,
unsigned int nblks);
asmlinkage void sm4_neon_cbc_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_neon_cfb_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_neon_ctr_enc_blk8(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_expandkey(ctx, key, key_len);
}
static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
{
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLK8(nbytes);
if (nblks) {
sm4_neon_crypt_blk8(rkey, dst, src, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_neon_crypt_blk1_8(rkey, dst, src, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_ecb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_ecb_do_crypt(req, ctx->rkey_enc);
}
static int sm4_ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_ecb_do_crypt(req, ctx->rkey_dec);
}
static int sm4_cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *iv = walk.iv;
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
while (nbytes >= SM4_BLOCK_SIZE) {
crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
sm4_crypt_block(ctx->rkey_enc, dst, dst);
iv = dst;
src += SM4_BLOCK_SIZE;
dst += SM4_BLOCK_SIZE;
nbytes -= SM4_BLOCK_SIZE;
}
if (iv != walk.iv)
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLK8(nbytes);
if (nblks) {
sm4_neon_cbc_dec_blk8(ctx->rkey_dec, dst, src,
walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
nblks = BYTES2BLKS(nbytes);
if (nblks) {
u8 keystream[SM4_BLOCK_SIZE * 8];
u8 iv[SM4_BLOCK_SIZE];
int i;
sm4_neon_crypt_blk1_8(ctx->rkey_dec, keystream,
src, nblks);
src += ((int)nblks - 2) * SM4_BLOCK_SIZE;
dst += (nblks - 1) * SM4_BLOCK_SIZE;
memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
for (i = nblks - 1; i > 0; i--) {
crypto_xor_cpy(dst, src,
&keystream[i * SM4_BLOCK_SIZE],
SM4_BLOCK_SIZE);
src -= SM4_BLOCK_SIZE;
dst -= SM4_BLOCK_SIZE;
}
crypto_xor_cpy(dst, walk.iv,
keystream, SM4_BLOCK_SIZE);
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_cfb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
u8 keystream[SM4_BLOCK_SIZE];
const u8 *iv = walk.iv;
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
while (nbytes >= SM4_BLOCK_SIZE) {
sm4_crypt_block(ctx->rkey_enc, keystream, iv);
crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
iv = dst;
src += SM4_BLOCK_SIZE;
dst += SM4_BLOCK_SIZE;
nbytes -= SM4_BLOCK_SIZE;
}
if (iv != walk.iv)
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_cfb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLK8(nbytes);
if (nblks) {
sm4_neon_cfb_dec_blk8(ctx->rkey_enc, dst, src,
walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
nblks = BYTES2BLKS(nbytes);
if (nblks) {
u8 keystream[SM4_BLOCK_SIZE * 8];
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
if (nblks > 1)
memcpy(&keystream[SM4_BLOCK_SIZE], src,
(nblks - 1) * SM4_BLOCK_SIZE);
memcpy(walk.iv, src + (nblks - 1) * SM4_BLOCK_SIZE,
SM4_BLOCK_SIZE);
sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
keystream, nblks);
crypto_xor_cpy(dst, src, keystream,
nblks * SM4_BLOCK_SIZE);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLK8(nbytes);
if (nblks) {
sm4_neon_ctr_enc_blk8(ctx->rkey_enc, dst, src,
walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
nblks = BYTES2BLKS(nbytes);
if (nblks) {
u8 keystream[SM4_BLOCK_SIZE * 8];
int i;
for (i = 0; i < nblks; i++) {
memcpy(&keystream[i * SM4_BLOCK_SIZE],
walk.iv, SM4_BLOCK_SIZE);
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
}
sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
keystream, nblks);
crypto_xor_cpy(dst, src, keystream,
nblks * SM4_BLOCK_SIZE);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static struct skcipher_alg sm4_algs[] = {
{
.base = {
.cra_name = "ecb(sm4)",
.cra_driver_name = "ecb-sm4-neon",
.cra_priority = 200,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_ecb_encrypt,
.decrypt = sm4_ecb_decrypt,
}, {
.base = {
.cra_name = "cbc(sm4)",
.cra_driver_name = "cbc-sm4-neon",
.cra_priority = 200,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_cbc_encrypt,
.decrypt = sm4_cbc_decrypt,
}, {
.base = {
.cra_name = "cfb(sm4)",
.cra_driver_name = "cfb-sm4-neon",
.cra_priority = 200,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_cfb_encrypt,
.decrypt = sm4_cfb_decrypt,
}, {
.base = {
.cra_name = "ctr(sm4)",
.cra_driver_name = "ctr-sm4-neon",
.cra_priority = 200,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_ctr_crypt,
.decrypt = sm4_ctr_crypt,
}
};
static int __init sm4_init(void)
{
return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
static void __exit sm4_exit(void)
{
crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
module_init(sm4_init);
module_exit(sm4_exit);
MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 NEON");
MODULE_ALIAS_CRYPTO("sm4-neon");
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("ecb(sm4)");
MODULE_ALIAS_CRYPTO("cbc(sm4)");
MODULE_ALIAS_CRYPTO("cfb(sm4)");
MODULE_ALIAS_CRYPTO("ctr(sm4)");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_LICENSE("GPL v2");

View File

@ -62,6 +62,34 @@ static int chacha20_s390(struct skcipher_request *req)
return rc;
}
void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
{
/* TODO: implement hchacha_block_arch() in assembly */
hchacha_block_generic(state, stream, nrounds);
}
EXPORT_SYMBOL(hchacha_block_arch);
void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
{
chacha_init_generic(state, key, iv);
}
EXPORT_SYMBOL(chacha_init_arch);
void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
/* s390 chacha20 implementation has 20 rounds hard-coded,
* it cannot handle a block of data or less, but otherwise
* it can handle data of arbitrary size
*/
if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20)
chacha_crypt_generic(state, dst, src, bytes, nrounds);
else
chacha20_crypt_s390(state, dst, src, bytes,
&state[4], &state[12]);
}
EXPORT_SYMBOL(chacha_crypt_arch);
static struct skcipher_alg chacha_algs[] = {
{
.base.cra_name = "chacha20",
@ -83,12 +111,14 @@ static struct skcipher_alg chacha_algs[] = {
static int __init chacha_mod_init(void)
{
return crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0;
}
static void __exit chacha_mod_fini(void)
{
crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
}
module_cpu_feature_match(VXRS, chacha_mod_init);

View File

@ -303,7 +303,7 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
static int __init blowfish_init(void)
{
int err;
@ -327,15 +327,15 @@ static int __init init(void)
return err;
}
static void __exit fini(void)
static void __exit blowfish_fini(void)
{
crypto_unregister_alg(&bf_cipher_alg);
crypto_unregister_skciphers(bf_skcipher_algs,
ARRAY_SIZE(bf_skcipher_algs));
}
module_init(init);
module_exit(fini);
module_init(blowfish_init);
module_exit(blowfish_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");

View File

@ -1377,7 +1377,7 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
static int __init camellia_init(void)
{
int err;
@ -1401,15 +1401,15 @@ static int __init init(void)
return err;
}
static void __exit fini(void)
static void __exit camellia_fini(void)
{
crypto_unregister_alg(&camellia_cipher_alg);
crypto_unregister_skciphers(camellia_skcipher_algs,
ARRAY_SIZE(camellia_skcipher_algs));
}
module_init(init);
module_exit(fini);
module_init(camellia_init);
module_exit(camellia_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");

View File

@ -96,7 +96,7 @@ static struct skcipher_alg serpent_algs[] = {
static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
static int __init init(void)
static int __init serpent_avx2_init(void)
{
const char *feature_name;
@ -115,14 +115,14 @@ static int __init init(void)
serpent_simd_algs);
}
static void __exit fini(void)
static void __exit serpent_avx2_fini(void)
{
simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
serpent_simd_algs);
}
module_init(init);
module_exit(fini);
module_init(serpent_avx2_init);
module_exit(serpent_avx2_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized");

View File

@ -81,18 +81,18 @@ static struct crypto_alg alg = {
}
};
static int __init init(void)
static int __init twofish_glue_init(void)
{
return crypto_register_alg(&alg);
}
static void __exit fini(void)
static void __exit twofish_glue_fini(void)
{
crypto_unregister_alg(&alg);
}
module_init(init);
module_exit(fini);
module_init(twofish_glue_init);
module_exit(twofish_glue_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized");

View File

@ -140,7 +140,7 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
static int __init twofish_3way_init(void)
{
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
@ -154,13 +154,13 @@ static int __init init(void)
ARRAY_SIZE(tf_skciphers));
}
static void __exit fini(void)
static void __exit twofish_3way_fini(void)
{
crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
}
module_init(init);
module_exit(fini);
module_init(twofish_3way_init);
module_exit(twofish_3way_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");

View File

@ -274,7 +274,7 @@ config CRYPTO_ECRDSA
config CRYPTO_SM2
tristate "SM2 algorithm"
select CRYPTO_LIB_SM3
select CRYPTO_SM3
select CRYPTO_AKCIPHER
select CRYPTO_MANAGER
select MPILIB
@ -1010,9 +1010,12 @@ config CRYPTO_SHA3
http://keccak.noekeon.org/
config CRYPTO_SM3
tristate
config CRYPTO_SM3_GENERIC
tristate "SM3 digest algorithm"
select CRYPTO_HASH
select CRYPTO_LIB_SM3
select CRYPTO_SM3
help
SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
It is part of the Chinese Commercial Cryptography suite.
@ -1025,7 +1028,7 @@ config CRYPTO_SM3_AVX_X86_64
tristate "SM3 digest algorithm (x86_64/AVX)"
depends on X86 && 64BIT
select CRYPTO_HASH
select CRYPTO_LIB_SM3
select CRYPTO_SM3
help
SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
It is part of the Chinese Commercial Cryptography suite. This is
@ -1572,9 +1575,12 @@ config CRYPTO_SERPENT_AVX2_X86_64
<https://www.cl.cam.ac.uk/~rja14/serpent.html>
config CRYPTO_SM4
tristate
config CRYPTO_SM4_GENERIC
tristate "SM4 cipher algorithm"
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
select CRYPTO_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016).
@ -1603,7 +1609,7 @@ config CRYPTO_SM4_AESNI_AVX_X86_64
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
select CRYPTO_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX).
@ -1624,7 +1630,7 @@ config CRYPTO_SM4_AESNI_AVX2_X86_64
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
select CRYPTO_LIB_SM4
select CRYPTO_SM4
select CRYPTO_SM4_AESNI_AVX_X86_64
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2).

View File

@ -78,7 +78,8 @@ obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
obj-$(CONFIG_CRYPTO_SM3) += sm3_generic.o
obj-$(CONFIG_CRYPTO_SM3) += sm3.o
obj-$(CONFIG_CRYPTO_SM3_GENERIC) += sm3_generic.o
obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
@ -134,7 +135,8 @@ obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
obj-$(CONFIG_CRYPTO_SM4) += sm4_generic.o
obj-$(CONFIG_CRYPTO_SM4) += sm4.o
obj-$(CONFIG_CRYPTO_SM4_GENERIC) += sm4_generic.o
obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o

View File

@ -39,6 +39,10 @@ struct cryptd_cpu_queue {
};
struct cryptd_queue {
/*
* Protected by disabling BH to allow enqueueing from softinterrupt and
* dequeuing from kworker (cryptd_queue_worker()).
*/
struct cryptd_cpu_queue __percpu *cpu_queue;
};
@ -125,28 +129,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue)
static int cryptd_enqueue_request(struct cryptd_queue *queue,
struct crypto_async_request *request)
{
int cpu, err;
int err;
struct cryptd_cpu_queue *cpu_queue;
refcount_t *refcnt;
cpu = get_cpu();
local_bh_disable();
cpu_queue = this_cpu_ptr(queue->cpu_queue);
err = crypto_enqueue_request(&cpu_queue->queue, request);
refcnt = crypto_tfm_ctx(request->tfm);
if (err == -ENOSPC)
goto out_put_cpu;
goto out;
queue_work_on(cpu, cryptd_wq, &cpu_queue->work);
queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work);
if (!refcount_read(refcnt))
goto out_put_cpu;
goto out;
refcount_inc(refcnt);
out_put_cpu:
put_cpu();
out:
local_bh_enable();
return err;
}
@ -162,15 +166,10 @@ static void cryptd_queue_worker(struct work_struct *work)
cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
/*
* Only handle one request at a time to avoid hogging crypto workqueue.
* preempt_disable/enable is used to prevent being preempted by
* cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
* cryptd_enqueue_request() being accessed from software interrupts.
*/
local_bh_disable();
preempt_disable();
backlog = crypto_get_backlog(&cpu_queue->queue);
req = crypto_dequeue_request(&cpu_queue->queue);
preempt_enable();
local_bh_enable();
if (!req)

View File

@ -253,6 +253,7 @@ static void crypto_pump_work(struct kthread_work *work)
* crypto_transfer_request - transfer the new request into the engine queue
* @engine: the hardware engine
* @req: the request need to be listed into the engine queue
* @need_pump: indicates whether queue the pump of request to kthread_work
*/
static int crypto_transfer_request(struct crypto_engine *engine,
struct crypto_async_request *req,

View File

@ -113,15 +113,15 @@ static int ecrdsa_verify(struct akcipher_request *req)
/* Step 1: verify that 0 < r < q, 0 < s < q */
if (vli_is_zero(r, ndigits) ||
vli_cmp(r, ctx->curve->n, ndigits) == 1 ||
vli_cmp(r, ctx->curve->n, ndigits) >= 0 ||
vli_is_zero(s, ndigits) ||
vli_cmp(s, ctx->curve->n, ndigits) == 1)
vli_cmp(s, ctx->curve->n, ndigits) >= 0)
return -EKEYREJECTED;
/* Step 2: calculate hash (h) of the message (passed as input) */
/* Step 3: calculate e = h \mod q */
vli_from_le64(e, digest, ndigits);
if (vli_cmp(e, ctx->curve->n, ndigits) == 1)
if (vli_cmp(e, ctx->curve->n, ndigits) >= 0)
vli_sub(e, e, ctx->curve->n, ndigits);
if (vli_is_zero(e, ndigits))
e[0] = 1;
@ -137,7 +137,7 @@ static int ecrdsa_verify(struct akcipher_request *req)
/* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */
ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key,
ctx->curve);
if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1)
if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0)
vli_sub(cc.x, cc.x, ctx->curve->n, ndigits);
/* Step 7: if R == r signature is valid */

View File

@ -11,7 +11,7 @@
#include <asm/unaligned.h>
#include <crypto/sm4.h>
static const u32 fk[4] = {
static const u32 ____cacheline_aligned fk[4] = {
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
};
@ -61,6 +61,14 @@ static const u8 ____cacheline_aligned sbox[256] = {
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
};
extern const u32 crypto_sm4_fk[4] __alias(fk);
extern const u32 crypto_sm4_ck[32] __alias(ck);
extern const u8 crypto_sm4_sbox[256] __alias(sbox);
EXPORT_SYMBOL(crypto_sm4_fk);
EXPORT_SYMBOL(crypto_sm4_ck);
EXPORT_SYMBOL(crypto_sm4_sbox);
static inline u32 sm4_t_non_lin_sub(u32 x)
{
u32 out;

View File

@ -232,6 +232,20 @@ enum finalization_type {
FINALIZATION_TYPE_DIGEST, /* use digest() */
};
/*
* Whether the crypto operation will occur in-place, and if so whether the
* source and destination scatterlist pointers will coincide (req->src ==
* req->dst), or whether they'll merely point to two separate scatterlists
* (req->src != req->dst) that reference the same underlying memory.
*
* This is only relevant for algorithm types that support in-place operation.
*/
enum inplace_mode {
OUT_OF_PLACE,
INPLACE_ONE_SGLIST,
INPLACE_TWO_SGLISTS,
};
#define TEST_SG_TOTAL 10000
/**
@ -265,7 +279,7 @@ struct test_sg_division {
* crypto test vector can be tested.
*
* @name: name of this config, logged for debugging purposes if a test fails
* @inplace: operate on the data in-place, if applicable for the algorithm type?
* @inplace_mode: whether and how to operate on the data in-place, if applicable
* @req_flags: extra request_flags, e.g. CRYPTO_TFM_REQ_MAY_SLEEP
* @src_divs: description of how to arrange the source scatterlist
* @dst_divs: description of how to arrange the dst scatterlist, if applicable
@ -282,7 +296,7 @@ struct test_sg_division {
*/
struct testvec_config {
const char *name;
bool inplace;
enum inplace_mode inplace_mode;
u32 req_flags;
struct test_sg_division src_divs[XBUFSIZE];
struct test_sg_division dst_divs[XBUFSIZE];
@ -307,11 +321,16 @@ struct testvec_config {
/* Configs for skciphers and aeads */
static const struct testvec_config default_cipher_testvec_configs[] = {
{
.name = "in-place",
.inplace = true,
.name = "in-place (one sglist)",
.inplace_mode = INPLACE_ONE_SGLIST,
.src_divs = { { .proportion_of_total = 10000 } },
}, {
.name = "in-place (two sglists)",
.inplace_mode = INPLACE_TWO_SGLISTS,
.src_divs = { { .proportion_of_total = 10000 } },
}, {
.name = "out-of-place",
.inplace_mode = OUT_OF_PLACE,
.src_divs = { { .proportion_of_total = 10000 } },
}, {
.name = "unaligned buffer, offset=1",
@ -349,7 +368,7 @@ static const struct testvec_config default_cipher_testvec_configs[] = {
.key_offset = 3,
}, {
.name = "misaligned splits crossing pages, inplace",
.inplace = true,
.inplace_mode = INPLACE_ONE_SGLIST,
.src_divs = {
{
.proportion_of_total = 7500,
@ -749,18 +768,39 @@ static int build_cipher_test_sglists(struct cipher_test_sglists *tsgls,
iov_iter_kvec(&input, WRITE, inputs, nr_inputs, src_total_len);
err = build_test_sglist(&tsgls->src, cfg->src_divs, alignmask,
cfg->inplace ?
cfg->inplace_mode != OUT_OF_PLACE ?
max(dst_total_len, src_total_len) :
src_total_len,
&input, NULL);
if (err)
return err;
if (cfg->inplace) {
/*
* In-place crypto operations can use the same scatterlist for both the
* source and destination (req->src == req->dst), or can use separate
* scatterlists (req->src != req->dst) which point to the same
* underlying memory. Make sure to test both cases.
*/
if (cfg->inplace_mode == INPLACE_ONE_SGLIST) {
tsgls->dst.sgl_ptr = tsgls->src.sgl;
tsgls->dst.nents = tsgls->src.nents;
return 0;
}
if (cfg->inplace_mode == INPLACE_TWO_SGLISTS) {
/*
* For now we keep it simple and only test the case where the
* two scatterlists have identical entries, rather than
* different entries that split up the same memory differently.
*/
memcpy(tsgls->dst.sgl, tsgls->src.sgl,
tsgls->src.nents * sizeof(tsgls->src.sgl[0]));
memcpy(tsgls->dst.sgl_saved, tsgls->src.sgl,
tsgls->src.nents * sizeof(tsgls->src.sgl[0]));
tsgls->dst.sgl_ptr = tsgls->dst.sgl;
tsgls->dst.nents = tsgls->src.nents;
return 0;
}
/* Out of place */
return build_test_sglist(&tsgls->dst,
cfg->dst_divs[0].proportion_of_total ?
cfg->dst_divs : cfg->src_divs,
@ -995,9 +1035,19 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
p += scnprintf(p, end - p, "random:");
if (prandom_u32() % 2 == 0) {
cfg->inplace = true;
p += scnprintf(p, end - p, " inplace");
switch (prandom_u32() % 4) {
case 0:
case 1:
cfg->inplace_mode = OUT_OF_PLACE;
break;
case 2:
cfg->inplace_mode = INPLACE_ONE_SGLIST;
p += scnprintf(p, end - p, " inplace_one_sglist");
break;
default:
cfg->inplace_mode = INPLACE_TWO_SGLISTS;
p += scnprintf(p, end - p, " inplace_two_sglists");
break;
}
if (prandom_u32() % 2 == 0) {
@ -1034,7 +1084,7 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
cfg->req_flags);
p += scnprintf(p, end - p, "]");
if (!cfg->inplace && prandom_u32() % 2 == 0) {
if (cfg->inplace_mode == OUT_OF_PLACE && prandom_u32() % 2 == 0) {
p += scnprintf(p, end - p, " dst_divs=[");
p = generate_random_sgl_divisions(cfg->dst_divs,
ARRAY_SIZE(cfg->dst_divs),
@ -2085,7 +2135,8 @@ static int test_aead_vec_cfg(int enc, const struct aead_testvec *vec,
/* Check for the correct output (ciphertext or plaintext) */
err = verify_correct_output(&tsgls->dst, enc ? vec->ctext : vec->ptext,
enc ? vec->clen : vec->plen,
vec->alen, enc || !cfg->inplace);
vec->alen,
enc || cfg->inplace_mode == OUT_OF_PLACE);
if (err == -EOVERFLOW) {
pr_err("alg: aead: %s %s overran dst buffer on test vector %s, cfg=\"%s\"\n",
driver, op, vec_name, cfg->name);

View File

@ -385,6 +385,19 @@ config HW_RANDOM_PIC32
If unsure, say Y.
config HW_RANDOM_POLARFIRE_SOC
tristate "Microchip PolarFire SoC Random Number Generator support"
depends on HW_RANDOM && POLARFIRE_SOC_SYS_CTRL
help
This driver provides kernel-side support for the Random Number
Generator hardware found on PolarFire SoC (MPFS).
To compile this driver as a module, choose M here. The
module will be called mfps_rng.
If unsure, say N.
config HW_RANDOM_MESON
tristate "Amlogic Meson Random Number Generator support"
depends on HW_RANDOM
@ -527,7 +540,7 @@ config HW_RANDOM_ARM_SMCCC_TRNG
config HW_RANDOM_CN10K
tristate "Marvell CN10K Random Number Generator support"
depends on HW_RANDOM && PCI && ARM64
depends on HW_RANDOM && PCI && (ARM64 || (64BIT && COMPILE_TEST))
default HW_RANDOM
help
This driver provides support for the True Random Number

View File

@ -46,3 +46,4 @@ obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o
obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o
obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o
obj-$(CONFIG_HW_RANDOM_CN10K) += cn10k-rng.o
obj-$(CONFIG_HW_RANDOM_POLARFIRE_SOC) += mpfs-rng.o

View File

@ -31,26 +31,23 @@ struct cn10k_rng {
#define PLAT_OCTEONTX_RESET_RNG_EBG_HEALTH_STATE 0xc2000b0f
static int reset_rng_health_state(struct cn10k_rng *rng)
static unsigned long reset_rng_health_state(struct cn10k_rng *rng)
{
struct arm_smccc_res res;
/* Send SMC service call to reset EBG health state */
arm_smccc_smc(PLAT_OCTEONTX_RESET_RNG_EBG_HEALTH_STATE, 0, 0, 0, 0, 0, 0, 0, &res);
if (res.a0 != 0UL)
return -EIO;
return 0;
return res.a0;
}
static int check_rng_health(struct cn10k_rng *rng)
{
u64 status;
int err;
unsigned long err;
/* Skip checking health */
if (!rng->reg_base)
return 0;
return -ENODEV;
status = readq(rng->reg_base + RNM_PF_EBG_HEALTH);
if (status & BIT_ULL(20)) {
@ -58,7 +55,9 @@ static int check_rng_health(struct cn10k_rng *rng)
if (err) {
dev_err(&rng->pdev->dev, "HWRNG: Health test failed (status=%llx)\n",
status);
dev_err(&rng->pdev->dev, "HWRNG: error during reset\n");
dev_err(&rng->pdev->dev, "HWRNG: error during reset (error=%lx)\n",
err);
return -EIO;
}
}
return 0;
@ -90,6 +89,7 @@ static int cn10k_rng_read(struct hwrng *hwrng, void *data,
{
struct cn10k_rng *rng = (struct cn10k_rng *)hwrng->priv;
unsigned int size;
u8 *pos = data;
int err = 0;
u64 value;
@ -102,17 +102,20 @@ static int cn10k_rng_read(struct hwrng *hwrng, void *data,
while (size >= 8) {
cn10k_read_trng(rng, &value);
*((u64 *)data) = (u64)value;
*((u64 *)pos) = value;
size -= 8;
data += 8;
pos += 8;
}
while (size > 0) {
if (size > 0) {
cn10k_read_trng(rng, &value);
*((u8 *)data) = (u8)value;
size--;
data++;
while (size > 0) {
*pos = (u8)value;
value >>= 8;
size--;
pos++;
}
}
return max - size;

View File

@ -0,0 +1,104 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Microchip PolarFire SoC (MPFS) hardware random driver
*
* Copyright (c) 2020-2022 Microchip Corporation. All rights reserved.
*
* Author: Conor Dooley <conor.dooley@microchip.com>
*/
#include <linux/module.h>
#include <linux/hw_random.h>
#include <linux/platform_device.h>
#include <soc/microchip/mpfs.h>
#define CMD_OPCODE 0x21
#define CMD_DATA_SIZE 0U
#define CMD_DATA NULL
#define MBOX_OFFSET 0U
#define RESP_OFFSET 0U
#define RNG_RESP_BYTES 32U
struct mpfs_rng {
struct mpfs_sys_controller *sys_controller;
struct hwrng rng;
};
static int mpfs_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
{
struct mpfs_rng *rng_priv = container_of(rng, struct mpfs_rng, rng);
u32 response_msg[RNG_RESP_BYTES / sizeof(u32)];
unsigned int count = 0, copy_size_bytes;
int ret;
struct mpfs_mss_response response = {
.resp_status = 0U,
.resp_msg = (u32 *)response_msg,
.resp_size = RNG_RESP_BYTES
};
struct mpfs_mss_msg msg = {
.cmd_opcode = CMD_OPCODE,
.cmd_data_size = CMD_DATA_SIZE,
.response = &response,
.cmd_data = CMD_DATA,
.mbox_offset = MBOX_OFFSET,
.resp_offset = RESP_OFFSET
};
while (count < max) {
ret = mpfs_blocking_transaction(rng_priv->sys_controller, &msg);
if (ret)
return ret;
copy_size_bytes = max - count > RNG_RESP_BYTES ? RNG_RESP_BYTES : max - count;
memcpy(buf + count, response_msg, copy_size_bytes);
count += copy_size_bytes;
if (!wait)
break;
}
return count;
}
static int mpfs_rng_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct mpfs_rng *rng_priv;
int ret;
rng_priv = devm_kzalloc(dev, sizeof(*rng_priv), GFP_KERNEL);
if (!rng_priv)
return -ENOMEM;
rng_priv->sys_controller = mpfs_sys_controller_get(&pdev->dev);
if (IS_ERR(rng_priv->sys_controller))
return dev_err_probe(dev, PTR_ERR(rng_priv->sys_controller),
"Failed to register system controller hwrng sub device\n");
rng_priv->rng.read = mpfs_rng_read;
rng_priv->rng.name = pdev->name;
rng_priv->rng.quality = 1024;
platform_set_drvdata(pdev, rng_priv);
ret = devm_hwrng_register(&pdev->dev, &rng_priv->rng);
if (ret)
return dev_err_probe(&pdev->dev, ret, "Failed to register MPFS hwrng\n");
dev_info(&pdev->dev, "Registered MPFS hwrng\n");
return 0;
}
static struct platform_driver mpfs_rng_driver = {
.driver = {
.name = "mpfs-rng",
},
.probe = mpfs_rng_probe,
};
module_platform_driver(mpfs_rng_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Conor Dooley <conor.dooley@microchip.com>");
MODULE_DESCRIPTION("PolarFire SoC (MPFS) hardware random driver");

View File

@ -92,7 +92,7 @@ static int __maybe_unused omap_rom_rng_runtime_resume(struct device *dev)
r = ddata->rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT);
if (r != 0) {
clk_disable(ddata->clk);
clk_disable_unprepare(ddata->clk);
dev_err(dev, "HW init failed: %d\n", r);
return -EIO;

View File

@ -115,7 +115,7 @@ static size_t get_optee_rng_data(struct optee_rng_private *pvt_data,
static int optee_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
{
struct optee_rng_private *pvt_data = to_optee_rng_private(rng);
size_t read = 0, rng_size = 0;
size_t read = 0, rng_size;
int timeout = 1;
u8 *data = buf;

View File

@ -216,9 +216,9 @@ config CRYPTO_AES_S390
config CRYPTO_CHACHA_S390
tristate "ChaCha20 stream cipher"
depends on S390
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
select CRYPTO_CHACHA20
select CRYPTO_LIB_CHACHA_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CHACHA
help
This is the s390 SIMD implementation of the ChaCha20 stream
cipher (RFC 7539).

View File

@ -3,6 +3,7 @@ obj-$(CONFIG_CRYPTO_DEV_ALLWINNER) += allwinner/
obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
# __init ordering requires atmel-i2c being before atmel-ecc and atmel-sha204a.
obj-$(CONFIG_CRYPTO_DEV_ATMEL_I2C) += atmel-i2c.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA204A) += atmel-sha204a.o

View File

@ -20,7 +20,6 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq)
unsigned int ivsize = crypto_skcipher_ivsize(tfm);
struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq);
u32 mode = ctx->mode;
void *backup_iv = NULL;
/* when activating SS, the default FIFO space is SS_RX_DEFAULT(32) */
u32 rx_cnt = SS_RX_DEFAULT;
u32 tx_cnt = 0;
@ -48,10 +47,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq)
}
if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) {
backup_iv = kzalloc(ivsize, GFP_KERNEL);
if (!backup_iv)
return -ENOMEM;
scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0);
scatterwalk_map_and_copy(ctx->backup_iv, areq->src,
areq->cryptlen - ivsize, ivsize, 0);
}
if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) {
@ -134,8 +131,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq)
if (areq->iv) {
if (mode & SS_DECRYPTION) {
memcpy(areq->iv, backup_iv, ivsize);
kfree_sensitive(backup_iv);
memcpy(areq->iv, ctx->backup_iv, ivsize);
memzero_explicit(ctx->backup_iv, ivsize);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize,
ivsize, 0);
@ -199,7 +196,6 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
unsigned int ileft = areq->cryptlen;
unsigned int oleft = areq->cryptlen;
unsigned int todo;
void *backup_iv = NULL;
struct sg_mapping_iter mi, mo;
unsigned long pi = 0, po = 0; /* progress for in and out */
bool miter_err;
@ -244,10 +240,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
return sun4i_ss_cipher_poll_fallback(areq);
if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) {
backup_iv = kzalloc(ivsize, GFP_KERNEL);
if (!backup_iv)
return -ENOMEM;
scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0);
scatterwalk_map_and_copy(ctx->backup_iv, areq->src,
areq->cryptlen - ivsize, ivsize, 0);
}
if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) {
@ -384,8 +378,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
}
if (areq->iv) {
if (mode & SS_DECRYPTION) {
memcpy(areq->iv, backup_iv, ivsize);
kfree_sensitive(backup_iv);
memcpy(areq->iv, ctx->backup_iv, ivsize);
memzero_explicit(ctx->backup_iv, ivsize);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize,
ivsize, 0);

View File

@ -183,6 +183,7 @@ struct sun4i_tfm_ctx {
struct sun4i_cipher_req_ctx {
u32 mode;
u8 backup_iv[AES_BLOCK_SIZE];
struct skcipher_request fallback_req; // keep at the end
};

View File

@ -25,26 +25,62 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
struct scatterlist *sg;
struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
struct sun8i_ce_alg_template *algt;
unsigned int todo, len;
if (sg_nents(areq->src) > MAX_SG || sg_nents(areq->dst) > MAX_SG)
algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
if (sg_nents_for_len(areq->src, areq->cryptlen) > MAX_SG ||
sg_nents_for_len(areq->dst, areq->cryptlen) > MAX_SG) {
algt->stat_fb_maxsg++;
return true;
}
if (areq->cryptlen < crypto_skcipher_ivsize(tfm))
if (areq->cryptlen < crypto_skcipher_ivsize(tfm)) {
algt->stat_fb_leniv++;
return true;
}
if (areq->cryptlen == 0 || areq->cryptlen % 16)
if (areq->cryptlen == 0) {
algt->stat_fb_len0++;
return true;
}
if (areq->cryptlen % 16) {
algt->stat_fb_mod16++;
return true;
}
len = areq->cryptlen;
sg = areq->src;
while (sg) {
if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
algt->stat_fb_srcali++;
return true;
}
todo = min(len, sg->length);
if (todo % 4) {
algt->stat_fb_srclen++;
return true;
}
len -= todo;
sg = sg_next(sg);
}
len = areq->cryptlen;
sg = areq->dst;
while (sg) {
if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
algt->stat_fb_dstali++;
return true;
}
todo = min(len, sg->length);
if (todo % 4) {
algt->stat_fb_dstlen++;
return true;
}
len -= todo;
sg = sg_next(sg);
}
return false;
@ -94,6 +130,8 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
int nr_sgs = 0;
int nr_sgd = 0;
int err = 0;
int ns = sg_nents_for_len(areq->src, areq->cryptlen);
int nd = sg_nents_for_len(areq->dst, areq->cryptlen);
algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
@ -152,23 +190,13 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
ivsize = crypto_skcipher_ivsize(tfm);
if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
rctx->ivlen = ivsize;
rctx->bounce_iv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA);
if (!rctx->bounce_iv) {
err = -ENOMEM;
goto theend_key;
}
if (rctx->op_dir & CE_DECRYPTION) {
rctx->backup_iv = kzalloc(ivsize, GFP_KERNEL);
if (!rctx->backup_iv) {
err = -ENOMEM;
goto theend_key;
}
offset = areq->cryptlen - ivsize;
scatterwalk_map_and_copy(rctx->backup_iv, areq->src,
scatterwalk_map_and_copy(chan->backup_iv, areq->src,
offset, ivsize, 0);
}
memcpy(rctx->bounce_iv, areq->iv, ivsize);
rctx->addr_iv = dma_map_single(ce->dev, rctx->bounce_iv, rctx->ivlen,
memcpy(chan->bounce_iv, areq->iv, ivsize);
rctx->addr_iv = dma_map_single(ce->dev, chan->bounce_iv, rctx->ivlen,
DMA_TO_DEVICE);
if (dma_mapping_error(ce->dev, rctx->addr_iv)) {
dev_err(ce->dev, "Cannot DMA MAP IV\n");
@ -179,8 +207,7 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
}
if (areq->src == areq->dst) {
nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
DMA_BIDIRECTIONAL);
nr_sgs = dma_map_sg(ce->dev, areq->src, ns, DMA_BIDIRECTIONAL);
if (nr_sgs <= 0 || nr_sgs > MAX_SG) {
dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
@ -188,15 +215,13 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
}
nr_sgd = nr_sgs;
} else {
nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
DMA_TO_DEVICE);
nr_sgs = dma_map_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
if (nr_sgs <= 0 || nr_sgs > MAX_SG) {
dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
goto theend_iv;
}
nr_sgd = dma_map_sg(ce->dev, areq->dst, sg_nents(areq->dst),
DMA_FROM_DEVICE);
nr_sgd = dma_map_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE);
if (nr_sgd <= 0 || nr_sgd > MAX_SG) {
dev_err(ce->dev, "Invalid sg number %d\n", nr_sgd);
err = -EINVAL;
@ -241,14 +266,11 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
theend_sgs:
if (areq->src == areq->dst) {
dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
DMA_BIDIRECTIONAL);
dma_unmap_sg(ce->dev, areq->src, ns, DMA_BIDIRECTIONAL);
} else {
if (nr_sgs > 0)
dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
DMA_TO_DEVICE);
dma_unmap_sg(ce->dev, areq->dst, sg_nents(areq->dst),
DMA_FROM_DEVICE);
dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
dma_unmap_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE);
}
theend_iv:
@ -257,16 +279,15 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE);
offset = areq->cryptlen - ivsize;
if (rctx->op_dir & CE_DECRYPTION) {
memcpy(areq->iv, rctx->backup_iv, ivsize);
kfree_sensitive(rctx->backup_iv);
memcpy(areq->iv, chan->backup_iv, ivsize);
memzero_explicit(chan->backup_iv, ivsize);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
ivsize, 0);
}
kfree(rctx->bounce_iv);
memzero_explicit(chan->bounce_iv, ivsize);
}
theend_key:
dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE);
theend:
@ -322,13 +343,13 @@ static int sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_r
dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE);
offset = areq->cryptlen - ivsize;
if (rctx->op_dir & CE_DECRYPTION) {
memcpy(areq->iv, rctx->backup_iv, ivsize);
kfree_sensitive(rctx->backup_iv);
memcpy(areq->iv, chan->backup_iv, ivsize);
memzero_explicit(chan->backup_iv, ivsize);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
ivsize, 0);
}
kfree(rctx->bounce_iv);
memzero_explicit(chan->bounce_iv, ivsize);
}
dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE);
@ -398,10 +419,9 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm)
sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx) +
crypto_skcipher_reqsize(op->fallback_tfm);
dev_info(op->ce->dev, "Fallback for %s is %s\n",
crypto_tfm_alg_driver_name(&sktfm->base),
crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
memcpy(algt->fbname,
crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)),
CRYPTO_MAX_ALG_NAME);
op->enginectx.op.do_one_request = sun8i_ce_cipher_run;
op->enginectx.op.prepare_request = sun8i_ce_cipher_prepare;

View File

@ -283,7 +283,7 @@ static struct sun8i_ce_alg_template ce_algs[] = {
.cra_priority = 400,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
.cra_module = THIS_MODULE,
@ -310,7 +310,7 @@ static struct sun8i_ce_alg_template ce_algs[] = {
.cra_priority = 400,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
.cra_module = THIS_MODULE,
@ -336,7 +336,7 @@ static struct sun8i_ce_alg_template ce_algs[] = {
.cra_priority = 400,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
.cra_module = THIS_MODULE,
@ -363,7 +363,7 @@ static struct sun8i_ce_alg_template ce_algs[] = {
.cra_priority = 400,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
.cra_module = THIS_MODULE,
@ -595,19 +595,47 @@ static int sun8i_ce_debugfs_show(struct seq_file *seq, void *v)
continue;
switch (ce_algs[i].type) {
case CRYPTO_ALG_TYPE_SKCIPHER:
seq_printf(seq, "%s %s %lu %lu\n",
seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
ce_algs[i].alg.skcipher.base.cra_driver_name,
ce_algs[i].alg.skcipher.base.cra_name,
ce_algs[i].stat_req, ce_algs[i].stat_fb);
seq_printf(seq, "\tLast fallback is: %s\n",
ce_algs[i].fbname);
seq_printf(seq, "\tFallback due to 0 length: %lu\n",
ce_algs[i].stat_fb_len0);
seq_printf(seq, "\tFallback due to length !mod16: %lu\n",
ce_algs[i].stat_fb_mod16);
seq_printf(seq, "\tFallback due to length < IV: %lu\n",
ce_algs[i].stat_fb_leniv);
seq_printf(seq, "\tFallback due to source alignment: %lu\n",
ce_algs[i].stat_fb_srcali);
seq_printf(seq, "\tFallback due to dest alignment: %lu\n",
ce_algs[i].stat_fb_dstali);
seq_printf(seq, "\tFallback due to source length: %lu\n",
ce_algs[i].stat_fb_srclen);
seq_printf(seq, "\tFallback due to dest length: %lu\n",
ce_algs[i].stat_fb_dstlen);
seq_printf(seq, "\tFallback due to SG numbers: %lu\n",
ce_algs[i].stat_fb_maxsg);
break;
case CRYPTO_ALG_TYPE_AHASH:
seq_printf(seq, "%s %s %lu %lu\n",
seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
ce_algs[i].alg.hash.halg.base.cra_driver_name,
ce_algs[i].alg.hash.halg.base.cra_name,
ce_algs[i].stat_req, ce_algs[i].stat_fb);
seq_printf(seq, "\tLast fallback is: %s\n",
ce_algs[i].fbname);
seq_printf(seq, "\tFallback due to 0 length: %lu\n",
ce_algs[i].stat_fb_len0);
seq_printf(seq, "\tFallback due to length: %lu\n",
ce_algs[i].stat_fb_srclen);
seq_printf(seq, "\tFallback due to alignment: %lu\n",
ce_algs[i].stat_fb_srcali);
seq_printf(seq, "\tFallback due to SG numbers: %lu\n",
ce_algs[i].stat_fb_maxsg);
break;
case CRYPTO_ALG_TYPE_RNG:
seq_printf(seq, "%s %s %lu %lu\n",
seq_printf(seq, "%s %s reqs=%lu bytes=%lu\n",
ce_algs[i].alg.rng.base.cra_driver_name,
ce_algs[i].alg.rng.base.cra_name,
ce_algs[i].stat_req, ce_algs[i].stat_bytes);
@ -673,6 +701,18 @@ static int sun8i_ce_allocate_chanlist(struct sun8i_ce_dev *ce)
err = -ENOMEM;
goto error_engine;
}
ce->chanlist[i].bounce_iv = devm_kmalloc(ce->dev, AES_BLOCK_SIZE,
GFP_KERNEL | GFP_DMA);
if (!ce->chanlist[i].bounce_iv) {
err = -ENOMEM;
goto error_engine;
}
ce->chanlist[i].backup_iv = devm_kmalloc(ce->dev, AES_BLOCK_SIZE,
GFP_KERNEL);
if (!ce->chanlist[i].backup_iv) {
err = -ENOMEM;
goto error_engine;
}
}
return 0;
error_engine:

View File

@ -50,9 +50,9 @@ int sun8i_ce_hash_crainit(struct crypto_tfm *tfm)
sizeof(struct sun8i_ce_hash_reqctx) +
crypto_ahash_reqsize(op->fallback_tfm));
dev_info(op->ce->dev, "Fallback for %s is %s\n",
crypto_tfm_alg_driver_name(tfm),
crypto_tfm_alg_driver_name(&op->fallback_tfm->base));
memcpy(algt->fbname, crypto_tfm_alg_driver_name(&op->fallback_tfm->base),
CRYPTO_MAX_ALG_NAME);
err = pm_runtime_get_sync(op->ce->dev);
if (err < 0)
goto error_pm;
@ -199,17 +199,32 @@ static int sun8i_ce_hash_digest_fb(struct ahash_request *areq)
static bool sun8i_ce_hash_need_fallback(struct ahash_request *areq)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
struct sun8i_ce_alg_template *algt;
struct scatterlist *sg;
if (areq->nbytes == 0)
algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
if (areq->nbytes == 0) {
algt->stat_fb_len0++;
return true;
}
/* we need to reserve one SG for padding one */
if (sg_nents(areq->src) > MAX_SG - 1)
if (sg_nents_for_len(areq->src, areq->nbytes) > MAX_SG - 1) {
algt->stat_fb_maxsg++;
return true;
}
sg = areq->src;
while (sg) {
if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
if (sg->length % 4) {
algt->stat_fb_srclen++;
return true;
}
if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
algt->stat_fb_srcali++;
return true;
}
sg = sg_next(sg);
}
return false;
@ -229,7 +244,7 @@ int sun8i_ce_hash_digest(struct ahash_request *areq)
if (sun8i_ce_hash_need_fallback(areq))
return sun8i_ce_hash_digest_fb(areq);
nr_sgs = sg_nents(areq->src);
nr_sgs = sg_nents_for_len(areq->src, areq->nbytes);
if (nr_sgs > MAX_SG - 1)
return sun8i_ce_hash_digest_fb(areq);
@ -248,6 +263,64 @@ int sun8i_ce_hash_digest(struct ahash_request *areq)
return crypto_transfer_hash_request_to_engine(engine, areq);
}
static u64 hash_pad(__le32 *buf, unsigned int bufsize, u64 padi, u64 byte_count, bool le, int bs)
{
u64 fill, min_fill, j, k;
__be64 *bebits;
__le64 *lebits;
j = padi;
buf[j++] = cpu_to_le32(0x80);
if (bs == 64) {
fill = 64 - (byte_count % 64);
min_fill = 2 * sizeof(u32) + sizeof(u32);
} else {
fill = 128 - (byte_count % 128);
min_fill = 4 * sizeof(u32) + sizeof(u32);
}
if (fill < min_fill)
fill += bs;
k = j;
j += (fill - min_fill) / sizeof(u32);
if (j * 4 > bufsize) {
pr_err("%s OVERFLOW %llu\n", __func__, j);
return 0;
}
for (; k < j; k++)
buf[k] = 0;
if (le) {
/* MD5 */
lebits = (__le64 *)&buf[j];
*lebits = cpu_to_le64(byte_count << 3);
j += 2;
} else {
if (bs == 64) {
/* sha1 sha224 sha256 */
bebits = (__be64 *)&buf[j];
*bebits = cpu_to_be64(byte_count << 3);
j += 2;
} else {
/* sha384 sha512*/
bebits = (__be64 *)&buf[j];
*bebits = cpu_to_be64(byte_count >> 61);
j += 2;
bebits = (__be64 *)&buf[j];
*bebits = cpu_to_be64(byte_count << 3);
j += 2;
}
}
if (j * 4 > bufsize) {
pr_err("%s OVERFLOW %llu\n", __func__, j);
return 0;
}
return j;
}
int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
{
struct ahash_request *areq = container_of(breq, struct ahash_request, base);
@ -266,14 +339,11 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
__le32 *bf;
void *buf = NULL;
int j, i, todo;
int nbw = 0;
u64 fill, min_fill;
__be64 *bebits;
__le64 *lebits;
void *result = NULL;
u64 bs;
int digestsize;
dma_addr_t addr_res, addr_pad;
int ns = sg_nents_for_len(areq->src, areq->nbytes);
algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
ce = algt->ce;
@ -318,7 +388,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
cet->t_sym_ctl = 0;
cet->t_asym_ctl = 0;
nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
nr_sgs = dma_map_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
if (nr_sgs <= 0 || nr_sgs > MAX_SG) {
dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
@ -348,44 +418,25 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
byte_count = areq->nbytes;
j = 0;
bf[j++] = cpu_to_le32(0x80);
if (bs == 64) {
fill = 64 - (byte_count % 64);
min_fill = 2 * sizeof(u32) + (nbw ? 0 : sizeof(u32));
} else {
fill = 128 - (byte_count % 128);
min_fill = 4 * sizeof(u32) + (nbw ? 0 : sizeof(u32));
}
if (fill < min_fill)
fill += bs;
j += (fill - min_fill) / sizeof(u32);
switch (algt->ce_algo_id) {
case CE_ID_HASH_MD5:
lebits = (__le64 *)&bf[j];
*lebits = cpu_to_le64(byte_count << 3);
j += 2;
j = hash_pad(bf, 2 * bs, j, byte_count, true, bs);
break;
case CE_ID_HASH_SHA1:
case CE_ID_HASH_SHA224:
case CE_ID_HASH_SHA256:
bebits = (__be64 *)&bf[j];
*bebits = cpu_to_be64(byte_count << 3);
j += 2;
j = hash_pad(bf, 2 * bs, j, byte_count, false, bs);
break;
case CE_ID_HASH_SHA384:
case CE_ID_HASH_SHA512:
bebits = (__be64 *)&bf[j];
*bebits = cpu_to_be64(byte_count >> 61);
j += 2;
bebits = (__be64 *)&bf[j];
*bebits = cpu_to_be64(byte_count << 3);
j += 2;
j = hash_pad(bf, 2 * bs, j, byte_count, false, bs);
break;
}
if (!j) {
err = -EINVAL;
goto theend;
}
addr_pad = dma_map_single(ce->dev, buf, j * 4, DMA_TO_DEVICE);
cet->t_src[i].addr = cpu_to_le32(addr_pad);
@ -406,8 +457,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(areq->base.tfm));
dma_unmap_single(ce->dev, addr_pad, j * 4, DMA_TO_DEVICE);
dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
DMA_TO_DEVICE);
dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE);

View File

@ -108,11 +108,9 @@ int sun8i_ce_prng_generate(struct crypto_rng *tfm, const u8 *src,
goto err_dst;
}
err = pm_runtime_get_sync(ce->dev);
if (err < 0) {
pm_runtime_put_noidle(ce->dev);
err = pm_runtime_resume_and_get(ce->dev);
if (err < 0)
goto err_pm;
}
mutex_lock(&ce->rnglock);
chan = &ce->chanlist[flow];

View File

@ -186,6 +186,8 @@ struct ce_task {
* @status: set to 1 by interrupt if task is done
* @t_phy: Physical address of task
* @tl: pointer to the current ce_task for this flow
* @backup_iv: buffer which contain the next IV to store
* @bounce_iv: buffer which contain the IV
* @stat_req: number of request done by this flow
*/
struct sun8i_ce_flow {
@ -195,6 +197,8 @@ struct sun8i_ce_flow {
dma_addr_t t_phy;
int timeout;
struct ce_task *tl;
void *backup_iv;
void *bounce_iv;
#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
unsigned long stat_req;
#endif
@ -241,8 +245,6 @@ struct sun8i_ce_dev {
* struct sun8i_cipher_req_ctx - context for a skcipher request
* @op_dir: direction (encrypt vs decrypt) for this request
* @flow: the flow to use for this request
* @backup_iv: buffer which contain the next IV to store
* @bounce_iv: buffer which contain the IV
* @ivlen: size of bounce_iv
* @nr_sgs: The number of source SG (as given by dma_map_sg())
* @nr_sgd: The number of destination SG (as given by dma_map_sg())
@ -253,8 +255,6 @@ struct sun8i_ce_dev {
struct sun8i_cipher_req_ctx {
u32 op_dir;
int flow;
void *backup_iv;
void *bounce_iv;
unsigned int ivlen;
int nr_sgs;
int nr_sgd;
@ -333,11 +333,18 @@ struct sun8i_ce_alg_template {
struct ahash_alg hash;
struct rng_alg rng;
} alg;
#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
unsigned long stat_req;
unsigned long stat_fb;
unsigned long stat_bytes;
#endif
unsigned long stat_fb_maxsg;
unsigned long stat_fb_leniv;
unsigned long stat_fb_len0;
unsigned long stat_fb_mod16;
unsigned long stat_fb_srcali;
unsigned long stat_fb_srclen;
unsigned long stat_fb_dstali;
unsigned long stat_fb_dstlen;
char fbname[CRYPTO_MAX_ALG_NAME];
};
int sun8i_ce_enqueue(struct crypto_async_request *areq, u32 type);

View File

@ -22,34 +22,53 @@
static bool sun8i_ss_need_fallback(struct skcipher_request *areq)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
struct sun8i_ss_alg_template *algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
struct scatterlist *in_sg = areq->src;
struct scatterlist *out_sg = areq->dst;
struct scatterlist *sg;
unsigned int todo, len;
if (areq->cryptlen == 0 || areq->cryptlen % 16)
if (areq->cryptlen == 0 || areq->cryptlen % 16) {
algt->stat_fb_len++;
return true;
}
if (sg_nents(areq->src) > 8 || sg_nents(areq->dst) > 8)
if (sg_nents_for_len(areq->src, areq->cryptlen) > 8 ||
sg_nents_for_len(areq->dst, areq->cryptlen) > 8) {
algt->stat_fb_sgnum++;
return true;
}
len = areq->cryptlen;
sg = areq->src;
while (sg) {
if ((sg->length % 16) != 0)
todo = min(len, sg->length);
if ((todo % 16) != 0) {
algt->stat_fb_sglen++;
return true;
if ((sg_dma_len(sg) % 16) != 0)
return true;
if (!IS_ALIGNED(sg->offset, 16))
}
if (!IS_ALIGNED(sg->offset, 16)) {
algt->stat_fb_align++;
return true;
}
len -= todo;
sg = sg_next(sg);
}
len = areq->cryptlen;
sg = areq->dst;
while (sg) {
if ((sg->length % 16) != 0)
todo = min(len, sg->length);
if ((todo % 16) != 0) {
algt->stat_fb_sglen++;
return true;
if ((sg_dma_len(sg) % 16) != 0)
return true;
if (!IS_ALIGNED(sg->offset, 16))
}
if (!IS_ALIGNED(sg->offset, 16)) {
algt->stat_fb_align++;
return true;
}
len -= todo;
sg = sg_next(sg);
}
@ -93,6 +112,68 @@ static int sun8i_ss_cipher_fallback(struct skcipher_request *areq)
return err;
}
static int sun8i_ss_setup_ivs(struct skcipher_request *areq)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
struct sun8i_ss_dev *ss = op->ss;
struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
struct scatterlist *sg = areq->src;
unsigned int todo, offset;
unsigned int len = areq->cryptlen;
unsigned int ivsize = crypto_skcipher_ivsize(tfm);
struct sun8i_ss_flow *sf = &ss->flows[rctx->flow];
int i = 0;
u32 a;
int err;
rctx->ivlen = ivsize;
if (rctx->op_dir & SS_DECRYPTION) {
offset = areq->cryptlen - ivsize;
scatterwalk_map_and_copy(sf->biv, areq->src, offset,
ivsize, 0);
}
/* we need to copy all IVs from source in case DMA is bi-directionnal */
while (sg && len) {
if (sg_dma_len(sg) == 0) {
sg = sg_next(sg);
continue;
}
if (i == 0)
memcpy(sf->iv[0], areq->iv, ivsize);
a = dma_map_single(ss->dev, sf->iv[i], ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(ss->dev, a)) {
memzero_explicit(sf->iv[i], ivsize);
dev_err(ss->dev, "Cannot DMA MAP IV\n");
err = -EFAULT;
goto dma_iv_error;
}
rctx->p_iv[i] = a;
/* we need to setup all others IVs only in the decrypt way */
if (rctx->op_dir & SS_ENCRYPTION)
return 0;
todo = min(len, sg_dma_len(sg));
len -= todo;
i++;
if (i < MAX_SG) {
offset = sg->length - ivsize;
scatterwalk_map_and_copy(sf->iv[i], sg, offset, ivsize, 0);
}
rctx->niv = i;
sg = sg_next(sg);
}
return 0;
dma_iv_error:
i--;
while (i >= 0) {
dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
memzero_explicit(sf->iv[i], ivsize);
}
return err;
}
static int sun8i_ss_cipher(struct skcipher_request *areq)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
@ -101,12 +182,14 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
struct sun8i_ss_alg_template *algt;
struct sun8i_ss_flow *sf = &ss->flows[rctx->flow];
struct scatterlist *sg;
unsigned int todo, len, offset, ivsize;
void *backup_iv = NULL;
int nr_sgs = 0;
int nr_sgd = 0;
int err = 0;
int nsgs = sg_nents_for_len(areq->src, areq->cryptlen);
int nsgd = sg_nents_for_len(areq->dst, areq->cryptlen);
int i;
algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
@ -134,34 +217,12 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
ivsize = crypto_skcipher_ivsize(tfm);
if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
rctx->ivlen = ivsize;
rctx->biv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA);
if (!rctx->biv) {
err = -ENOMEM;
err = sun8i_ss_setup_ivs(areq);
if (err)
goto theend_key;
}
if (rctx->op_dir & SS_DECRYPTION) {
backup_iv = kzalloc(ivsize, GFP_KERNEL);
if (!backup_iv) {
err = -ENOMEM;
goto theend_key;
}
offset = areq->cryptlen - ivsize;
scatterwalk_map_and_copy(backup_iv, areq->src, offset,
ivsize, 0);
}
memcpy(rctx->biv, areq->iv, ivsize);
rctx->p_iv = dma_map_single(ss->dev, rctx->biv, rctx->ivlen,
DMA_TO_DEVICE);
if (dma_mapping_error(ss->dev, rctx->p_iv)) {
dev_err(ss->dev, "Cannot DMA MAP IV\n");
err = -ENOMEM;
goto theend_iv;
}
}
if (areq->src == areq->dst) {
nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src),
DMA_BIDIRECTIONAL);
nr_sgs = dma_map_sg(ss->dev, areq->src, nsgs, DMA_BIDIRECTIONAL);
if (nr_sgs <= 0 || nr_sgs > 8) {
dev_err(ss->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
@ -169,15 +230,13 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
}
nr_sgd = nr_sgs;
} else {
nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src),
DMA_TO_DEVICE);
nr_sgs = dma_map_sg(ss->dev, areq->src, nsgs, DMA_TO_DEVICE);
if (nr_sgs <= 0 || nr_sgs > 8) {
dev_err(ss->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
goto theend_iv;
}
nr_sgd = dma_map_sg(ss->dev, areq->dst, sg_nents(areq->dst),
DMA_FROM_DEVICE);
nr_sgd = dma_map_sg(ss->dev, areq->dst, nsgd, DMA_FROM_DEVICE);
if (nr_sgd <= 0 || nr_sgd > 8) {
dev_err(ss->dev, "Invalid sg number %d\n", nr_sgd);
err = -EINVAL;
@ -233,31 +292,26 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
theend_sgs:
if (areq->src == areq->dst) {
dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src),
DMA_BIDIRECTIONAL);
dma_unmap_sg(ss->dev, areq->src, nsgs, DMA_BIDIRECTIONAL);
} else {
dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src),
DMA_TO_DEVICE);
dma_unmap_sg(ss->dev, areq->dst, sg_nents(areq->dst),
DMA_FROM_DEVICE);
dma_unmap_sg(ss->dev, areq->src, nsgs, DMA_TO_DEVICE);
dma_unmap_sg(ss->dev, areq->dst, nsgd, DMA_FROM_DEVICE);
}
theend_iv:
if (rctx->p_iv)
dma_unmap_single(ss->dev, rctx->p_iv, rctx->ivlen,
DMA_TO_DEVICE);
if (areq->iv && ivsize > 0) {
if (rctx->biv) {
offset = areq->cryptlen - ivsize;
if (rctx->op_dir & SS_DECRYPTION) {
memcpy(areq->iv, backup_iv, ivsize);
kfree_sensitive(backup_iv);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
ivsize, 0);
}
kfree(rctx->biv);
for (i = 0; i < rctx->niv; i++) {
dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
memzero_explicit(sf->iv[i], ivsize);
}
offset = areq->cryptlen - ivsize;
if (rctx->op_dir & SS_DECRYPTION) {
memcpy(areq->iv, sf->biv, ivsize);
memzero_explicit(sf->biv, ivsize);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
ivsize, 0);
}
}
@ -349,9 +403,9 @@ int sun8i_ss_cipher_init(struct crypto_tfm *tfm)
crypto_skcipher_reqsize(op->fallback_tfm);
dev_info(op->ss->dev, "Fallback for %s is %s\n",
crypto_tfm_alg_driver_name(&sktfm->base),
crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
memcpy(algt->fbname,
crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)),
CRYPTO_MAX_ALG_NAME);
op->enginectx.op.do_one_request = sun8i_ss_handle_cipher_request;
op->enginectx.op.prepare_request = NULL;

View File

@ -66,6 +66,7 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx
const char *name)
{
int flow = rctx->flow;
unsigned int ivlen = rctx->ivlen;
u32 v = SS_START;
int i;
@ -104,15 +105,14 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx
mutex_lock(&ss->mlock);
writel(rctx->p_key, ss->base + SS_KEY_ADR_REG);
if (i == 0) {
if (rctx->p_iv)
writel(rctx->p_iv, ss->base + SS_IV_ADR_REG);
} else {
if (rctx->biv) {
if (rctx->op_dir == SS_ENCRYPTION)
writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG);
if (ivlen) {
if (rctx->op_dir == SS_ENCRYPTION) {
if (i == 0)
writel(rctx->p_iv[0], ss->base + SS_IV_ADR_REG);
else
writel(rctx->t_src[i - 1].addr + rctx->t_src[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG);
writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - ivlen, ss->base + SS_IV_ADR_REG);
} else {
writel(rctx->p_iv[i], ss->base + SS_IV_ADR_REG);
}
}
@ -409,6 +409,37 @@ static struct sun8i_ss_alg_template ss_algs[] = {
}
}
},
{ .type = CRYPTO_ALG_TYPE_AHASH,
.ss_algo_id = SS_ID_HASH_SHA1,
.alg.hash = {
.init = sun8i_ss_hash_init,
.update = sun8i_ss_hash_update,
.final = sun8i_ss_hash_final,
.finup = sun8i_ss_hash_finup,
.digest = sun8i_ss_hash_digest,
.export = sun8i_ss_hash_export,
.import = sun8i_ss_hash_import,
.setkey = sun8i_ss_hmac_setkey,
.halg = {
.digestsize = SHA1_DIGEST_SIZE,
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "hmac(sha1)",
.cra_driver_name = "hmac-sha1-sun8i-ss",
.cra_priority = 300,
.cra_alignmask = 3,
.cra_flags = CRYPTO_ALG_TYPE_AHASH |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sun8i_ss_hash_tfm_ctx),
.cra_module = THIS_MODULE,
.cra_init = sun8i_ss_hash_crainit,
.cra_exit = sun8i_ss_hash_craexit,
}
}
}
},
#endif
};
@ -430,6 +461,17 @@ static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v)
ss_algs[i].alg.skcipher.base.cra_driver_name,
ss_algs[i].alg.skcipher.base.cra_name,
ss_algs[i].stat_req, ss_algs[i].stat_fb);
seq_printf(seq, "\tLast fallback is: %s\n",
ss_algs[i].fbname);
seq_printf(seq, "\tFallback due to length: %lu\n",
ss_algs[i].stat_fb_len);
seq_printf(seq, "\tFallback due to SG length: %lu\n",
ss_algs[i].stat_fb_sglen);
seq_printf(seq, "\tFallback due to alignment: %lu\n",
ss_algs[i].stat_fb_align);
seq_printf(seq, "\tFallback due to SG numbers: %lu\n",
ss_algs[i].stat_fb_sgnum);
break;
case CRYPTO_ALG_TYPE_RNG:
seq_printf(seq, "%s %s reqs=%lu tsize=%lu\n",
@ -442,6 +484,16 @@ static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v)
ss_algs[i].alg.hash.halg.base.cra_driver_name,
ss_algs[i].alg.hash.halg.base.cra_name,
ss_algs[i].stat_req, ss_algs[i].stat_fb);
seq_printf(seq, "\tLast fallback is: %s\n",
ss_algs[i].fbname);
seq_printf(seq, "\tFallback due to length: %lu\n",
ss_algs[i].stat_fb_len);
seq_printf(seq, "\tFallback due to SG length: %lu\n",
ss_algs[i].stat_fb_sglen);
seq_printf(seq, "\tFallback due to alignment: %lu\n",
ss_algs[i].stat_fb_align);
seq_printf(seq, "\tFallback due to SG numbers: %lu\n",
ss_algs[i].stat_fb_sgnum);
break;
}
}
@ -464,7 +516,7 @@ static void sun8i_ss_free_flows(struct sun8i_ss_dev *ss, int i)
*/
static int allocate_flows(struct sun8i_ss_dev *ss)
{
int i, err;
int i, j, err;
ss->flows = devm_kcalloc(ss->dev, MAXFLOW, sizeof(struct sun8i_ss_flow),
GFP_KERNEL);
@ -474,6 +526,28 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
for (i = 0; i < MAXFLOW; i++) {
init_completion(&ss->flows[i].complete);
ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
GFP_KERNEL | GFP_DMA);
if (!ss->flows[i].biv)
goto error_engine;
for (j = 0; j < MAX_SG; j++) {
ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
GFP_KERNEL | GFP_DMA);
if (!ss->flows[i].iv[j])
goto error_engine;
}
/* the padding could be up to two block. */
ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE,
GFP_KERNEL | GFP_DMA);
if (!ss->flows[i].pad)
goto error_engine;
ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE,
GFP_KERNEL | GFP_DMA);
if (!ss->flows[i].result)
goto error_engine;
ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true);
if (!ss->flows[i].engine) {
dev_err(ss->dev, "Cannot allocate engine\n");

View File

@ -14,11 +14,99 @@
#include <linux/pm_runtime.h>
#include <linux/scatterlist.h>
#include <crypto/internal/hash.h>
#include <crypto/hmac.h>
#include <crypto/scatterwalk.h>
#include <crypto/sha1.h>
#include <crypto/sha2.h>
#include <crypto/md5.h>
#include "sun8i-ss.h"
static int sun8i_ss_hashkey(struct sun8i_ss_hash_tfm_ctx *tfmctx, const u8 *key,
unsigned int keylen)
{
struct crypto_shash *xtfm;
struct shash_desc *sdesc;
size_t len;
int ret = 0;
xtfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_NEED_FALLBACK);
if (!xtfm)
return -ENOMEM;
len = sizeof(*sdesc) + crypto_shash_descsize(xtfm);
sdesc = kmalloc(len, GFP_KERNEL);
if (!sdesc) {
ret = -ENOMEM;
goto err_hashkey_sdesc;
}
sdesc->tfm = xtfm;
ret = crypto_shash_init(sdesc);
if (ret) {
dev_err(tfmctx->ss->dev, "shash init error ret=%d\n", ret);
goto err_hashkey;
}
ret = crypto_shash_finup(sdesc, key, keylen, tfmctx->key);
if (ret)
dev_err(tfmctx->ss->dev, "shash finup error\n");
err_hashkey:
kfree(sdesc);
err_hashkey_sdesc:
crypto_free_shash(xtfm);
return ret;
}
int sun8i_ss_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
unsigned int keylen)
{
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(ahash);
struct ahash_alg *alg = __crypto_ahash_alg(ahash->base.__crt_alg);
struct sun8i_ss_alg_template *algt;
int digestsize, i;
int bs = crypto_ahash_blocksize(ahash);
int ret;
algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
digestsize = algt->alg.hash.halg.digestsize;
if (keylen > bs) {
ret = sun8i_ss_hashkey(tfmctx, key, keylen);
if (ret)
return ret;
tfmctx->keylen = digestsize;
} else {
tfmctx->keylen = keylen;
memcpy(tfmctx->key, key, keylen);
}
tfmctx->ipad = kzalloc(bs, GFP_KERNEL | GFP_DMA);
if (!tfmctx->ipad)
return -ENOMEM;
tfmctx->opad = kzalloc(bs, GFP_KERNEL | GFP_DMA);
if (!tfmctx->opad) {
ret = -ENOMEM;
goto err_opad;
}
memset(tfmctx->key + tfmctx->keylen, 0, bs - tfmctx->keylen);
memcpy(tfmctx->ipad, tfmctx->key, tfmctx->keylen);
memcpy(tfmctx->opad, tfmctx->key, tfmctx->keylen);
for (i = 0; i < bs; i++) {
tfmctx->ipad[i] ^= HMAC_IPAD_VALUE;
tfmctx->opad[i] ^= HMAC_OPAD_VALUE;
}
ret = crypto_ahash_setkey(tfmctx->fallback_tfm, key, keylen);
if (!ret)
return 0;
memzero_explicit(tfmctx->key, keylen);
kfree_sensitive(tfmctx->opad);
err_opad:
kfree_sensitive(tfmctx->ipad);
return ret;
}
int sun8i_ss_hash_crainit(struct crypto_tfm *tfm)
{
struct sun8i_ss_hash_tfm_ctx *op = crypto_tfm_ctx(tfm);
@ -50,9 +138,8 @@ int sun8i_ss_hash_crainit(struct crypto_tfm *tfm)
sizeof(struct sun8i_ss_hash_reqctx) +
crypto_ahash_reqsize(op->fallback_tfm));
dev_info(op->ss->dev, "Fallback for %s is %s\n",
crypto_tfm_alg_driver_name(tfm),
crypto_tfm_alg_driver_name(&op->fallback_tfm->base));
memcpy(algt->fbname, crypto_tfm_alg_driver_name(&op->fallback_tfm->base), CRYPTO_MAX_ALG_NAME);
err = pm_runtime_get_sync(op->ss->dev);
if (err < 0)
goto error_pm;
@ -67,6 +154,9 @@ void sun8i_ss_hash_craexit(struct crypto_tfm *tfm)
{
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_tfm_ctx(tfm);
kfree_sensitive(tfmctx->ipad);
kfree_sensitive(tfmctx->opad);
crypto_free_ahash(tfmctx->fallback_tfm);
pm_runtime_put_sync_suspend(tfmctx->ss->dev);
}
@ -258,23 +348,48 @@ static int sun8i_ss_run_hash_task(struct sun8i_ss_dev *ss,
static bool sun8i_ss_hash_need_fallback(struct ahash_request *areq)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
struct sun8i_ss_alg_template *algt;
struct scatterlist *sg;
if (areq->nbytes == 0)
algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
if (areq->nbytes == 0) {
algt->stat_fb_len++;
return true;
}
if (areq->nbytes >= MAX_PAD_SIZE - 64) {
algt->stat_fb_len++;
return true;
}
/* we need to reserve one SG for the padding one */
if (sg_nents(areq->src) > MAX_SG - 1)
if (sg_nents(areq->src) > MAX_SG - 1) {
algt->stat_fb_sgnum++;
return true;
}
sg = areq->src;
while (sg) {
/* SS can operate hash only on full block size
* since SS support only MD5,sha1,sha224 and sha256, blocksize
* is always 64
* TODO: handle request if last SG is not len%64
* but this will need to copy data on a new SG of size=64
*/
if (sg->length % 64 || !IS_ALIGNED(sg->offset, sizeof(u32)))
/* Only the last block could be bounced to the pad buffer */
if (sg->length % 64 && sg_next(sg)) {
algt->stat_fb_sglen++;
return true;
}
if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
algt->stat_fb_align++;
return true;
}
if (sg->length % 4) {
algt->stat_fb_sglen++;
return true;
}
sg = sg_next(sg);
}
return false;
@ -288,21 +403,11 @@ int sun8i_ss_hash_digest(struct ahash_request *areq)
struct sun8i_ss_alg_template *algt;
struct sun8i_ss_dev *ss;
struct crypto_engine *engine;
struct scatterlist *sg;
int nr_sgs, e, i;
int e;
if (sun8i_ss_hash_need_fallback(areq))
return sun8i_ss_hash_digest_fb(areq);
nr_sgs = sg_nents(areq->src);
if (nr_sgs > MAX_SG - 1)
return sun8i_ss_hash_digest_fb(areq);
for_each_sg(areq->src, sg, nr_sgs, i) {
if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
return sun8i_ss_hash_digest_fb(areq);
}
algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
ss = algt->ss;
@ -313,6 +418,64 @@ int sun8i_ss_hash_digest(struct ahash_request *areq)
return crypto_transfer_hash_request_to_engine(engine, areq);
}
static u64 hash_pad(__le32 *buf, unsigned int bufsize, u64 padi, u64 byte_count, bool le, int bs)
{
u64 fill, min_fill, j, k;
__be64 *bebits;
__le64 *lebits;
j = padi;
buf[j++] = cpu_to_le32(0x80);
if (bs == 64) {
fill = 64 - (byte_count % 64);
min_fill = 2 * sizeof(u32) + sizeof(u32);
} else {
fill = 128 - (byte_count % 128);
min_fill = 4 * sizeof(u32) + sizeof(u32);
}
if (fill < min_fill)
fill += bs;
k = j;
j += (fill - min_fill) / sizeof(u32);
if (j * 4 > bufsize) {
pr_err("%s OVERFLOW %llu\n", __func__, j);
return 0;
}
for (; k < j; k++)
buf[k] = 0;
if (le) {
/* MD5 */
lebits = (__le64 *)&buf[j];
*lebits = cpu_to_le64(byte_count << 3);
j += 2;
} else {
if (bs == 64) {
/* sha1 sha224 sha256 */
bebits = (__be64 *)&buf[j];
*bebits = cpu_to_be64(byte_count << 3);
j += 2;
} else {
/* sha384 sha512*/
bebits = (__be64 *)&buf[j];
*bebits = cpu_to_be64(byte_count >> 61);
j += 2;
bebits = (__be64 *)&buf[j];
*bebits = cpu_to_be64(byte_count << 3);
j += 2;
}
}
if (j * 4 > bufsize) {
pr_err("%s OVERFLOW %llu\n", __func__, j);
return 0;
}
return j;
}
/* sun8i_ss_hash_run - run an ahash request
* Send the data of the request to the SS along with an extra SG with padding
*/
@ -320,20 +483,26 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
{
struct ahash_request *areq = container_of(breq, struct ahash_request, base);
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
struct sun8i_ss_hash_reqctx *rctx = ahash_request_ctx(areq);
struct sun8i_ss_alg_template *algt;
struct sun8i_ss_dev *ss;
struct scatterlist *sg;
int bs = crypto_ahash_blocksize(tfm);
int nr_sgs, err, digestsize;
unsigned int len;
u64 fill, min_fill, byte_count;
u64 byte_count;
void *pad, *result;
int j, i, todo;
__be64 *bebits;
__le64 *lebits;
dma_addr_t addr_res, addr_pad;
int j, i, k, todo;
dma_addr_t addr_res, addr_pad, addr_xpad;
__le32 *bf;
/* HMAC step:
* 0: normal hashing
* 1: IPAD
* 2: OPAD
*/
int hmac = 0;
algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
ss = algt->ss;
@ -342,18 +511,10 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
if (digestsize == SHA224_DIGEST_SIZE)
digestsize = SHA256_DIGEST_SIZE;
/* the padding could be up to two block. */
pad = kzalloc(algt->alg.hash.halg.base.cra_blocksize * 2, GFP_KERNEL | GFP_DMA);
if (!pad)
return -ENOMEM;
result = ss->flows[rctx->flow].result;
pad = ss->flows[rctx->flow].pad;
bf = (__le32 *)pad;
result = kzalloc(digestsize, GFP_KERNEL | GFP_DMA);
if (!result) {
kfree(pad);
return -ENOMEM;
}
for (i = 0; i < MAX_SG; i++) {
rctx->t_dst[i].addr = 0;
rctx->t_dst[i].len = 0;
@ -376,17 +537,33 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
if (dma_mapping_error(ss->dev, addr_res)) {
dev_err(ss->dev, "DMA map dest\n");
err = -EINVAL;
goto theend;
goto err_dma_result;
}
j = 0;
len = areq->nbytes;
for_each_sg(areq->src, sg, nr_sgs, i) {
rctx->t_src[i].addr = sg_dma_address(sg);
sg = areq->src;
i = 0;
while (len > 0 && sg) {
if (sg_dma_len(sg) == 0) {
sg = sg_next(sg);
continue;
}
todo = min(len, sg_dma_len(sg));
rctx->t_src[i].len = todo / 4;
len -= todo;
rctx->t_dst[i].addr = addr_res;
rctx->t_dst[i].len = digestsize / 4;
/* only the last SG could be with a size not modulo64 */
if (todo % 64 == 0) {
rctx->t_src[i].addr = sg_dma_address(sg);
rctx->t_src[i].len = todo / 4;
rctx->t_dst[i].addr = addr_res;
rctx->t_dst[i].len = digestsize / 4;
len -= todo;
} else {
scatterwalk_map_and_copy(bf, sg, 0, todo, 0);
j += todo / 4;
len -= todo;
}
sg = sg_next(sg);
i++;
}
if (len > 0) {
dev_err(ss->dev, "remaining len %d\n", len);
@ -394,55 +571,135 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
goto theend;
}
if (j > 0)
i--;
retry:
byte_count = areq->nbytes;
j = 0;
bf[j++] = cpu_to_le32(0x80);
if (tfmctx->keylen && hmac == 0) {
hmac = 1;
/* shift all SG one slot up, to free slot 0 for IPAD */
for (k = 6; k >= 0; k--) {
rctx->t_src[k + 1].addr = rctx->t_src[k].addr;
rctx->t_src[k + 1].len = rctx->t_src[k].len;
rctx->t_dst[k + 1].addr = rctx->t_dst[k].addr;
rctx->t_dst[k + 1].len = rctx->t_dst[k].len;
}
addr_xpad = dma_map_single(ss->dev, tfmctx->ipad, bs, DMA_TO_DEVICE);
if (dma_mapping_error(ss->dev, addr_xpad)) {
dev_err(ss->dev, "Fail to create DMA mapping of ipad\n");
goto err_dma_xpad;
}
rctx->t_src[0].addr = addr_xpad;
rctx->t_src[0].len = bs / 4;
rctx->t_dst[0].addr = addr_res;
rctx->t_dst[0].len = digestsize / 4;
i++;
byte_count = areq->nbytes + bs;
}
if (tfmctx->keylen && hmac == 2) {
for (i = 0; i < MAX_SG; i++) {
rctx->t_src[i].addr = 0;
rctx->t_src[i].len = 0;
rctx->t_dst[i].addr = 0;
rctx->t_dst[i].len = 0;
}
fill = 64 - (byte_count % 64);
min_fill = 3 * sizeof(u32);
addr_res = dma_map_single(ss->dev, result, digestsize, DMA_FROM_DEVICE);
if (dma_mapping_error(ss->dev, addr_res)) {
dev_err(ss->dev, "Fail to create DMA mapping of result\n");
err = -EINVAL;
goto err_dma_result;
}
addr_xpad = dma_map_single(ss->dev, tfmctx->opad, bs, DMA_TO_DEVICE);
if (dma_mapping_error(ss->dev, addr_xpad)) {
dev_err(ss->dev, "Fail to create DMA mapping of opad\n");
goto err_dma_xpad;
}
rctx->t_src[0].addr = addr_xpad;
rctx->t_src[0].len = bs / 4;
if (fill < min_fill)
fill += 64;
memcpy(bf, result, digestsize);
j = digestsize / 4;
i = 1;
byte_count = digestsize + bs;
j += (fill - min_fill) / sizeof(u32);
rctx->t_dst[0].addr = addr_res;
rctx->t_dst[0].len = digestsize / 4;
}
switch (algt->ss_algo_id) {
case SS_ID_HASH_MD5:
lebits = (__le64 *)&bf[j];
*lebits = cpu_to_le64(byte_count << 3);
j += 2;
j = hash_pad(bf, 4096, j, byte_count, true, bs);
break;
case SS_ID_HASH_SHA1:
case SS_ID_HASH_SHA224:
case SS_ID_HASH_SHA256:
bebits = (__be64 *)&bf[j];
*bebits = cpu_to_be64(byte_count << 3);
j += 2;
j = hash_pad(bf, 4096, j, byte_count, false, bs);
break;
}
addr_pad = dma_map_single(ss->dev, pad, j * 4, DMA_TO_DEVICE);
rctx->t_src[i].addr = addr_pad;
rctx->t_src[i].len = j;
rctx->t_dst[i].addr = addr_res;
rctx->t_dst[i].len = digestsize / 4;
if (dma_mapping_error(ss->dev, addr_pad)) {
dev_err(ss->dev, "DMA error on padding SG\n");
if (!j) {
err = -EINVAL;
goto theend;
}
addr_pad = dma_map_single(ss->dev, pad, j * 4, DMA_TO_DEVICE);
if (dma_mapping_error(ss->dev, addr_pad)) {
dev_err(ss->dev, "DMA error on padding SG\n");
err = -EINVAL;
goto err_dma_pad;
}
rctx->t_src[i].addr = addr_pad;
rctx->t_src[i].len = j;
rctx->t_dst[i].addr = addr_res;
rctx->t_dst[i].len = digestsize / 4;
err = sun8i_ss_run_hash_task(ss, rctx, crypto_tfm_alg_name(areq->base.tfm));
dma_unmap_single(ss->dev, addr_pad, j * 4, DMA_TO_DEVICE);
dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src),
DMA_TO_DEVICE);
dma_unmap_single(ss->dev, addr_res, digestsize, DMA_FROM_DEVICE);
/*
* mini helper for checking dma map/unmap
* flow start for hmac = 0 (and HMAC = 1)
* HMAC = 0
* MAP src
* MAP res
*
* retry:
* if hmac then hmac = 1
* MAP xpad (ipad)
* if hmac == 2
* MAP res
* MAP xpad (opad)
* MAP pad
* ACTION!
* UNMAP pad
* if hmac
* UNMAP xpad
* UNMAP res
* if hmac < 2
* UNMAP SRC
*
* if hmac = 1 then hmac = 2 goto retry
*/
memcpy(areq->result, result, algt->alg.hash.halg.digestsize);
dma_unmap_single(ss->dev, addr_pad, j * 4, DMA_TO_DEVICE);
err_dma_pad:
if (hmac > 0)
dma_unmap_single(ss->dev, addr_xpad, bs, DMA_TO_DEVICE);
err_dma_xpad:
dma_unmap_single(ss->dev, addr_res, digestsize, DMA_FROM_DEVICE);
err_dma_result:
if (hmac < 2)
dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src),
DMA_TO_DEVICE);
if (hmac == 1 && !err) {
hmac = 2;
goto retry;
}
if (!err)
memcpy(areq->result, result, algt->alg.hash.halg.digestsize);
theend:
kfree(pad);
kfree(result);
local_bh_disable();
crypto_finalize_hash_request(engine, breq, err);
local_bh_enable();

View File

@ -112,11 +112,9 @@ int sun8i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
goto err_iv;
}
err = pm_runtime_get_sync(ss->dev);
if (err < 0) {
pm_runtime_put_noidle(ss->dev);
err = pm_runtime_resume_and_get(ss->dev);
if (err < 0)
goto err_pm;
}
err = 0;
mutex_lock(&ss->mlock);

View File

@ -82,6 +82,8 @@
#define PRNG_DATA_SIZE (160 / 8)
#define PRNG_SEED_SIZE DIV_ROUND_UP(175, 8)
#define MAX_PAD_SIZE 4096
/*
* struct ss_clock - Describe clocks used by sun8i-ss
* @name: Name of clock needed by this variant
@ -121,11 +123,19 @@ struct sginfo {
* @complete: completion for the current task on this flow
* @status: set to 1 by interrupt if task is done
* @stat_req: number of request done by this flow
* @iv: list of IV to use for each step
* @biv: buffer which contain the backuped IV
* @pad: padding buffer for hash operations
* @result: buffer for storing the result of hash operations
*/
struct sun8i_ss_flow {
struct crypto_engine *engine;
struct completion complete;
int status;
u8 *iv[MAX_SG];
u8 *biv;
void *pad;
void *result;
#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
unsigned long stat_req;
#endif
@ -164,28 +174,28 @@ struct sun8i_ss_dev {
* @t_src: list of mapped SGs with their size
* @t_dst: list of mapped SGs with their size
* @p_key: DMA address of the key
* @p_iv: DMA address of the IV
* @p_iv: DMA address of the IVs
* @niv: Number of IVs DMA mapped
* @method: current algorithm for this request
* @op_mode: op_mode for this request
* @op_dir: direction (encrypt vs decrypt) for this request
* @flow: the flow to use for this request
* @ivlen: size of biv
* @ivlen: size of IVs
* @keylen: keylen for this request
* @biv: buffer which contain the IV
* @fallback_req: request struct for invoking the fallback skcipher TFM
*/
struct sun8i_cipher_req_ctx {
struct sginfo t_src[MAX_SG];
struct sginfo t_dst[MAX_SG];
u32 p_key;
u32 p_iv;
u32 p_iv[MAX_SG];
int niv;
u32 method;
u32 op_mode;
u32 op_dir;
int flow;
unsigned int ivlen;
unsigned int keylen;
void *biv;
struct skcipher_request fallback_req; // keep at the end
};
@ -229,6 +239,10 @@ struct sun8i_ss_hash_tfm_ctx {
struct crypto_engine_ctx enginectx;
struct crypto_ahash *fallback_tfm;
struct sun8i_ss_dev *ss;
u8 *ipad;
u8 *opad;
u8 key[SHA256_BLOCK_SIZE];
int keylen;
};
/*
@ -269,11 +283,14 @@ struct sun8i_ss_alg_template {
struct rng_alg rng;
struct ahash_alg hash;
} alg;
#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
unsigned long stat_req;
unsigned long stat_fb;
unsigned long stat_bytes;
#endif
unsigned long stat_fb_len;
unsigned long stat_fb_sglen;
unsigned long stat_fb_align;
unsigned long stat_fb_sgnum;
char fbname[CRYPTO_MAX_ALG_NAME];
};
int sun8i_ss_enqueue(struct crypto_async_request *areq, u32 type);
@ -306,3 +323,5 @@ int sun8i_ss_hash_update(struct ahash_request *areq);
int sun8i_ss_hash_finup(struct ahash_request *areq);
int sun8i_ss_hash_digest(struct ahash_request *areq);
int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq);
int sun8i_ss_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
unsigned int keylen);

View File

@ -398,7 +398,7 @@ static int __init atmel_ecc_init(void)
static void __exit atmel_ecc_exit(void)
{
flush_scheduled_work();
atmel_i2c_flush_queue();
i2c_del_driver(&atmel_ecc_driver);
}

View File

@ -263,6 +263,8 @@ static void atmel_i2c_work_handler(struct work_struct *work)
work_data->cbk(work_data, work_data->areq, status);
}
static struct workqueue_struct *atmel_wq;
void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data,
void (*cbk)(struct atmel_i2c_work_data *work_data,
void *areq, int status),
@ -272,10 +274,16 @@ void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data,
work_data->areq = areq;
INIT_WORK(&work_data->work, atmel_i2c_work_handler);
schedule_work(&work_data->work);
queue_work(atmel_wq, &work_data->work);
}
EXPORT_SYMBOL(atmel_i2c_enqueue);
void atmel_i2c_flush_queue(void)
{
flush_workqueue(atmel_wq);
}
EXPORT_SYMBOL(atmel_i2c_flush_queue);
static inline size_t atmel_i2c_wake_token_sz(u32 bus_clk_rate)
{
u32 no_of_bits = DIV_ROUND_UP(TWLO_USEC * bus_clk_rate, USEC_PER_SEC);
@ -364,14 +372,24 @@ int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
i2c_set_clientdata(client, i2c_priv);
ret = device_sanity_check(client);
if (ret)
return ret;
return 0;
return device_sanity_check(client);
}
EXPORT_SYMBOL(atmel_i2c_probe);
static int __init atmel_i2c_init(void)
{
atmel_wq = alloc_workqueue("atmel_wq", 0, 0);
return atmel_wq ? 0 : -ENOMEM;
}
static void __exit atmel_i2c_exit(void)
{
destroy_workqueue(atmel_wq);
}
module_init(atmel_i2c_init);
module_exit(atmel_i2c_exit);
MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>");
MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");
MODULE_LICENSE("GPL v2");

View File

@ -173,6 +173,7 @@ void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data,
void (*cbk)(struct atmel_i2c_work_data *work_data,
void *areq, int status),
void *areq);
void atmel_i2c_flush_queue(void);
int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd);

View File

@ -121,23 +121,24 @@ static int atmel_sha204a_remove(struct i2c_client *client)
struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
if (atomic_read(&i2c_priv->tfm_count)) {
dev_err(&client->dev, "Device is busy\n");
return -EBUSY;
dev_emerg(&client->dev, "Device is busy, will remove it anyhow\n");
return 0;
}
if (i2c_priv->hwrng.priv)
kfree((void *)i2c_priv->hwrng.priv);
kfree((void *)i2c_priv->hwrng.priv);
return 0;
}
static const struct of_device_id atmel_sha204a_dt_ids[] = {
{ .compatible = "atmel,atsha204", },
{ .compatible = "atmel,atsha204a", },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, atmel_sha204a_dt_ids);
static const struct i2c_device_id atmel_sha204a_id[] = {
{ "atsha204", 0 },
{ "atsha204a", 0 },
{ /* sentinel */ }
};
@ -159,7 +160,7 @@ static int __init atmel_sha204a_init(void)
static void __exit atmel_sha204a_exit(void)
{
flush_scheduled_work();
atmel_i2c_flush_queue();
i2c_del_driver(&atmel_sha204a_driver);
}

View File

@ -151,6 +151,14 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
Selecting this will register the SEC4 hardware rng to
the hw_random API for supplying the kernel entropy pool.
config CRYPTO_DEV_FSL_CAAM_PRNG_API
bool "Register Pseudo random number generation implementation with Crypto API"
default y
select CRYPTO_RNG
help
Selecting this will register the SEC hardware prng to
the Crypto API.
config CRYPTO_DEV_FSL_CAAM_BLOB_GEN
bool

View File

@ -20,6 +20,7 @@ caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += caamalg_qi.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API) += caamprng.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caampkc.o pkc_desc.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_BLOB_GEN) += blob_gen.o

View File

@ -0,0 +1,235 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* Driver to expose SEC4 PRNG via crypto RNG API
*
* Copyright 2022 NXP
*
*/
#include <linux/completion.h>
#include <crypto/internal/rng.h>
#include "compat.h"
#include "regs.h"
#include "intern.h"
#include "desc_constr.h"
#include "jr.h"
#include "error.h"
/*
* Length of used descriptors, see caam_init_desc()
*/
#define CAAM_PRNG_MAX_DESC_LEN (CAAM_CMD_SZ + \
CAAM_CMD_SZ + \
CAAM_CMD_SZ + CAAM_PTR_SZ_MAX)
/* prng per-device context */
struct caam_prng_ctx {
int err;
struct completion done;
};
struct caam_prng_alg {
struct rng_alg rng;
bool registered;
};
static void caam_prng_done(struct device *jrdev, u32 *desc, u32 err,
void *context)
{
struct caam_prng_ctx *jctx = context;
jctx->err = err ? caam_jr_strstatus(jrdev, err) : 0;
complete(&jctx->done);
}
static u32 *caam_init_reseed_desc(u32 *desc)
{
init_job_desc(desc, 0); /* + 1 cmd_sz */
/* Generate random bytes: + 1 cmd_sz */
append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
OP_ALG_AS_FINALIZE);
print_hex_dump_debug("prng reseed desc@: ", DUMP_PREFIX_ADDRESS,
16, 4, desc, desc_bytes(desc), 1);
return desc;
}
static u32 *caam_init_prng_desc(u32 *desc, dma_addr_t dst_dma, u32 len)
{
init_job_desc(desc, 0); /* + 1 cmd_sz */
/* Generate random bytes: + 1 cmd_sz */
append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
/* Store bytes: + 1 cmd_sz + caam_ptr_sz */
append_fifo_store(desc, dst_dma,
len, FIFOST_TYPE_RNGSTORE);
print_hex_dump_debug("prng job desc@: ", DUMP_PREFIX_ADDRESS,
16, 4, desc, desc_bytes(desc), 1);
return desc;
}
static int caam_prng_generate(struct crypto_rng *tfm,
const u8 *src, unsigned int slen,
u8 *dst, unsigned int dlen)
{
struct caam_prng_ctx ctx;
struct device *jrdev;
dma_addr_t dst_dma;
u32 *desc;
u8 *buf;
int ret;
buf = kzalloc(dlen, GFP_KERNEL);
if (!buf)
return -ENOMEM;
jrdev = caam_jr_alloc();
ret = PTR_ERR_OR_ZERO(jrdev);
if (ret) {
pr_err("Job Ring Device allocation failed\n");
kfree(buf);
return ret;
}
desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA);
if (!desc) {
ret = -ENOMEM;
goto out1;
}
dst_dma = dma_map_single(jrdev, buf, dlen, DMA_FROM_DEVICE);
if (dma_mapping_error(jrdev, dst_dma)) {
dev_err(jrdev, "Failed to map destination buffer memory\n");
ret = -ENOMEM;
goto out;
}
init_completion(&ctx.done);
ret = caam_jr_enqueue(jrdev,
caam_init_prng_desc(desc, dst_dma, dlen),
caam_prng_done, &ctx);
if (ret == -EINPROGRESS) {
wait_for_completion(&ctx.done);
ret = ctx.err;
}
dma_unmap_single(jrdev, dst_dma, dlen, DMA_FROM_DEVICE);
if (!ret)
memcpy(dst, buf, dlen);
out:
kfree(desc);
out1:
caam_jr_free(jrdev);
kfree(buf);
return ret;
}
static void caam_prng_exit(struct crypto_tfm *tfm) {}
static int caam_prng_init(struct crypto_tfm *tfm)
{
return 0;
}
static int caam_prng_seed(struct crypto_rng *tfm,
const u8 *seed, unsigned int slen)
{
struct caam_prng_ctx ctx;
struct device *jrdev;
u32 *desc;
int ret;
if (slen) {
pr_err("Seed length should be zero\n");
return -EINVAL;
}
jrdev = caam_jr_alloc();
ret = PTR_ERR_OR_ZERO(jrdev);
if (ret) {
pr_err("Job Ring Device allocation failed\n");
return ret;
}
desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA);
if (!desc) {
caam_jr_free(jrdev);
return -ENOMEM;
}
init_completion(&ctx.done);
ret = caam_jr_enqueue(jrdev,
caam_init_reseed_desc(desc),
caam_prng_done, &ctx);
if (ret == -EINPROGRESS) {
wait_for_completion(&ctx.done);
ret = ctx.err;
}
kfree(desc);
caam_jr_free(jrdev);
return ret;
}
static struct caam_prng_alg caam_prng_alg = {
.rng = {
.generate = caam_prng_generate,
.seed = caam_prng_seed,
.seedsize = 0,
.base = {
.cra_name = "stdrng",
.cra_driver_name = "prng-caam",
.cra_priority = 500,
.cra_ctxsize = sizeof(struct caam_prng_ctx),
.cra_module = THIS_MODULE,
.cra_init = caam_prng_init,
.cra_exit = caam_prng_exit,
},
}
};
void caam_prng_unregister(void *data)
{
if (caam_prng_alg.registered)
crypto_unregister_rng(&caam_prng_alg.rng);
}
int caam_prng_register(struct device *ctrldev)
{
struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
u32 rng_inst;
int ret = 0;
/* Check for available RNG blocks before registration */
if (priv->era < 10)
rng_inst = (rd_reg32(&priv->jr[0]->perfmon.cha_num_ls) &
CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT;
else
rng_inst = rd_reg32(&priv->jr[0]->vreg.rng) & CHA_VER_NUM_MASK;
if (!rng_inst) {
dev_dbg(ctrldev, "RNG block is not available... skipping registering algorithm\n");
return ret;
}
ret = crypto_register_rng(&caam_prng_alg.rng);
if (ret) {
dev_err(ctrldev,
"couldn't register rng crypto alg: %d\n",
ret);
return ret;
}
caam_prng_alg.registered = true;
dev_info(ctrldev,
"rng crypto API alg registered %s\n", caam_prng_alg.rng.base.cra_driver_name);
return 0;
}

View File

@ -609,6 +609,13 @@ static bool check_version(struct fsl_mc_version *mc_version, u32 major,
}
#endif
static bool needs_entropy_delay_adjustment(void)
{
if (of_machine_is_compatible("fsl,imx6sx"))
return true;
return false;
}
/* Probe routine for CAAM top (controller) level */
static int caam_probe(struct platform_device *pdev)
{
@ -868,6 +875,8 @@ static int caam_probe(struct platform_device *pdev)
* Also, if a handle was instantiated, do not change
* the TRNG parameters.
*/
if (needs_entropy_delay_adjustment())
ent_delay = 12000;
if (!(ctrlpriv->rng4_sh_init || inst_handles)) {
dev_info(dev,
"Entropy delay = %u\n",
@ -884,6 +893,15 @@ static int caam_probe(struct platform_device *pdev)
*/
ret = instantiate_rng(dev, inst_handles,
gen_sk);
/*
* Entropy delay is determined via TRNG characterization.
* TRNG characterization is run across different voltages
* and temperatures.
* If worst case value for ent_dly is identified,
* the loop can be skipped for that platform.
*/
if (needs_entropy_delay_adjustment())
break;
if (ret == -EAGAIN)
/*
* if here, the loop will rerun,

View File

@ -186,6 +186,21 @@ static inline void caam_rng_exit(struct device *dev) {}
#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */
#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API
int caam_prng_register(struct device *dev);
void caam_prng_unregister(void *data);
#else
static inline int caam_prng_register(struct device *dev)
{
return 0;
}
static inline void caam_prng_unregister(void *data) {}
#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API */
#ifdef CONFIG_CAAM_QI
int caam_qi_algapi_init(struct device *dev);

View File

@ -39,6 +39,7 @@ static void register_algs(struct caam_drv_private_jr *jrpriv,
caam_algapi_hash_init(dev);
caam_pkc_init(dev);
jrpriv->hwrng = !caam_rng_init(dev);
caam_prng_register(dev);
caam_qi_algapi_init(dev);
algs_unlock:
@ -53,7 +54,7 @@ static void unregister_algs(void)
goto algs_unlock;
caam_qi_algapi_exit();
caam_prng_unregister(NULL);
caam_pkc_exit();
caam_algapi_hash_exit();
caam_algapi_exit();

View File

@ -269,15 +269,17 @@ static void nitrox_remove_from_devlist(struct nitrox_device *ndev)
struct nitrox_device *nitrox_get_first_device(void)
{
struct nitrox_device *ndev;
struct nitrox_device *ndev = NULL, *iter;
mutex_lock(&devlist_lock);
list_for_each_entry(ndev, &ndevlist, list) {
if (nitrox_ready(ndev))
list_for_each_entry(iter, &ndevlist, list) {
if (nitrox_ready(iter)) {
ndev = iter;
break;
}
}
mutex_unlock(&devlist_lock);
if (&ndev->list == &ndevlist)
if (!ndev)
return NULL;
refcount_inc(&ndev->refcnt);

View File

@ -70,17 +70,23 @@ static unsigned int psp_get_capability(struct psp_device *psp)
*/
if (val == 0xffffffff) {
dev_notice(psp->dev, "psp: unable to access the device: you might be running a broken BIOS.\n");
return 0;
return -ENODEV;
}
psp->capability = val;
return val;
/* Detect if TSME and SME are both enabled */
if (psp->capability & PSP_CAPABILITY_PSP_SECURITY_REPORTING &&
psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET) &&
cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n");
return 0;
}
static int psp_check_sev_support(struct psp_device *psp,
unsigned int capability)
static int psp_check_sev_support(struct psp_device *psp)
{
/* Check if device supports SEV feature */
if (!(capability & 1)) {
if (!(psp->capability & PSP_CAPABILITY_SEV)) {
dev_dbg(psp->dev, "psp does not support SEV\n");
return -ENODEV;
}
@ -88,11 +94,10 @@ static int psp_check_sev_support(struct psp_device *psp,
return 0;
}
static int psp_check_tee_support(struct psp_device *psp,
unsigned int capability)
static int psp_check_tee_support(struct psp_device *psp)
{
/* Check if device supports TEE feature */
if (!(capability & 2)) {
if (!(psp->capability & PSP_CAPABILITY_TEE)) {
dev_dbg(psp->dev, "psp does not support TEE\n");
return -ENODEV;
}
@ -100,30 +105,17 @@ static int psp_check_tee_support(struct psp_device *psp,
return 0;
}
static int psp_check_support(struct psp_device *psp,
unsigned int capability)
{
int sev_support = psp_check_sev_support(psp, capability);
int tee_support = psp_check_tee_support(psp, capability);
/* Return error if device neither supports SEV nor TEE */
if (sev_support && tee_support)
return -ENODEV;
return 0;
}
static int psp_init(struct psp_device *psp, unsigned int capability)
static int psp_init(struct psp_device *psp)
{
int ret;
if (!psp_check_sev_support(psp, capability)) {
if (!psp_check_sev_support(psp)) {
ret = sev_dev_init(psp);
if (ret)
return ret;
}
if (!psp_check_tee_support(psp, capability)) {
if (!psp_check_tee_support(psp)) {
ret = tee_dev_init(psp);
if (ret)
return ret;
@ -136,7 +128,6 @@ int psp_dev_init(struct sp_device *sp)
{
struct device *dev = sp->dev;
struct psp_device *psp;
unsigned int capability;
int ret;
ret = -ENOMEM;
@ -155,11 +146,7 @@ int psp_dev_init(struct sp_device *sp)
psp->io_regs = sp->io_map;
capability = psp_get_capability(psp);
if (!capability)
goto e_disable;
ret = psp_check_support(psp, capability);
ret = psp_get_capability(psp);
if (ret)
goto e_disable;
@ -174,7 +161,7 @@ int psp_dev_init(struct sp_device *sp)
goto e_err;
}
ret = psp_init(psp, capability);
ret = psp_init(psp);
if (ret)
goto e_irq;

View File

@ -45,6 +45,8 @@ struct psp_device {
void *sev_data;
void *tee_data;
unsigned int capability;
};
void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler,
@ -57,4 +59,24 @@ void psp_clear_tee_irq_handler(struct psp_device *psp);
struct psp_device *psp_get_master_device(void);
#define PSP_CAPABILITY_SEV BIT(0)
#define PSP_CAPABILITY_TEE BIT(1)
#define PSP_CAPABILITY_PSP_SECURITY_REPORTING BIT(7)
#define PSP_CAPABILITY_PSP_SECURITY_OFFSET 8
/*
* The PSP doesn't directly store these bits in the capability register
* but instead copies them from the results of query command.
*
* The offsets from the query command are below, and shifted when used.
*/
#define PSP_SECURITY_FUSED_PART BIT(0)
#define PSP_SECURITY_DEBUG_LOCK_ON BIT(2)
#define PSP_SECURITY_TSME_STATUS BIT(5)
#define PSP_SECURITY_ANTI_ROLLBACK_STATUS BIT(7)
#define PSP_SECURITY_RPMC_PRODUCTION_ENABLED BIT(8)
#define PSP_SECURITY_RPMC_SPIROM_AVAILABLE BIT(9)
#define PSP_SECURITY_HSP_TPM_AVAILABLE BIT(10)
#define PSP_SECURITY_ROM_ARMOR_ENFORCED BIT(11)
#endif /* __PSP_DEV_H */

View File

@ -23,6 +23,7 @@
#include <linux/gfp.h>
#include <linux/cpufeature.h>
#include <linux/fs.h>
#include <linux/fs_struct.h>
#include <asm/smp.h>
@ -170,6 +171,31 @@ static void *sev_fw_alloc(unsigned long len)
return page_address(page);
}
static struct file *open_file_as_root(const char *filename, int flags, umode_t mode)
{
struct file *fp;
struct path root;
struct cred *cred;
const struct cred *old_cred;
task_lock(&init_task);
get_fs_root(init_task.fs, &root);
task_unlock(&init_task);
cred = prepare_creds();
if (!cred)
return ERR_PTR(-ENOMEM);
cred->fsuid = GLOBAL_ROOT_UID;
old_cred = override_creds(cred);
fp = file_open_root(&root, filename, flags, mode);
path_put(&root);
revert_creds(old_cred);
return fp;
}
static int sev_read_init_ex_file(void)
{
struct sev_device *sev = psp_master->sev_data;
@ -181,7 +207,7 @@ static int sev_read_init_ex_file(void)
if (!sev_init_ex_buffer)
return -EOPNOTSUPP;
fp = filp_open(init_ex_path, O_RDONLY, 0);
fp = open_file_as_root(init_ex_path, O_RDONLY, 0);
if (IS_ERR(fp)) {
int ret = PTR_ERR(fp);
@ -217,7 +243,7 @@ static void sev_write_init_ex_file(void)
if (!sev_init_ex_buffer)
return;
fp = filp_open(init_ex_path, O_CREAT | O_WRONLY, 0600);
fp = open_file_as_root(init_ex_path, O_CREAT | O_WRONLY, 0600);
if (IS_ERR(fp)) {
dev_err(sev->dev,
"SEV: could not open file for write, error %ld\n",
@ -435,7 +461,7 @@ static int __sev_platform_init_locked(int *error)
* initialization function should succeed by replacing the state
* with a reset state.
*/
dev_dbg(sev->dev, "SEV: retrying INIT command");
dev_err(sev->dev, "SEV: retrying INIT command because of SECURE_DATA_INVALID error. Retrying once to reset PSP SEV state.");
rc = init_function(&psp_ret);
}
if (error)

View File

@ -32,6 +32,67 @@ struct sp_pci {
};
static struct sp_device *sp_dev_master;
#define attribute_show(name, def) \
static ssize_t name##_show(struct device *d, struct device_attribute *attr, \
char *buf) \
{ \
struct sp_device *sp = dev_get_drvdata(d); \
struct psp_device *psp = sp->psp_data; \
int bit = PSP_SECURITY_##def << PSP_CAPABILITY_PSP_SECURITY_OFFSET; \
return sysfs_emit(buf, "%d\n", (psp->capability & bit) > 0); \
}
attribute_show(fused_part, FUSED_PART)
static DEVICE_ATTR_RO(fused_part);
attribute_show(debug_lock_on, DEBUG_LOCK_ON)
static DEVICE_ATTR_RO(debug_lock_on);
attribute_show(tsme_status, TSME_STATUS)
static DEVICE_ATTR_RO(tsme_status);
attribute_show(anti_rollback_status, ANTI_ROLLBACK_STATUS)
static DEVICE_ATTR_RO(anti_rollback_status);
attribute_show(rpmc_production_enabled, RPMC_PRODUCTION_ENABLED)
static DEVICE_ATTR_RO(rpmc_production_enabled);
attribute_show(rpmc_spirom_available, RPMC_SPIROM_AVAILABLE)
static DEVICE_ATTR_RO(rpmc_spirom_available);
attribute_show(hsp_tpm_available, HSP_TPM_AVAILABLE)
static DEVICE_ATTR_RO(hsp_tpm_available);
attribute_show(rom_armor_enforced, ROM_ARMOR_ENFORCED)
static DEVICE_ATTR_RO(rom_armor_enforced);
static struct attribute *psp_attrs[] = {
&dev_attr_fused_part.attr,
&dev_attr_debug_lock_on.attr,
&dev_attr_tsme_status.attr,
&dev_attr_anti_rollback_status.attr,
&dev_attr_rpmc_production_enabled.attr,
&dev_attr_rpmc_spirom_available.attr,
&dev_attr_hsp_tpm_available.attr,
&dev_attr_rom_armor_enforced.attr,
NULL
};
static umode_t psp_security_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
{
struct device *dev = kobj_to_dev(kobj);
struct sp_device *sp = dev_get_drvdata(dev);
struct psp_device *psp = sp->psp_data;
if (psp && (psp->capability & PSP_CAPABILITY_PSP_SECURITY_REPORTING))
return 0444;
return 0;
}
static struct attribute_group psp_attr_group = {
.attrs = psp_attrs,
.is_visible = psp_security_is_visible,
};
static const struct attribute_group *psp_groups[] = {
&psp_attr_group,
NULL,
};
static int sp_get_msix_irqs(struct sp_device *sp)
{
struct sp_pci *sp_pci = sp->dev_specific;
@ -391,6 +452,7 @@ static struct pci_driver sp_pci_driver = {
.remove = sp_pci_remove,
.shutdown = sp_pci_shutdown,
.driver.pm = &sp_pci_pm_ops,
.dev_groups = psp_groups,
};
int sp_pci_init(void)

View File

@ -356,12 +356,14 @@ void cc_unmap_cipher_request(struct device *dev, void *ctx,
req_ctx->mlli_params.mlli_dma_addr);
}
dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL);
dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
if (src != dst) {
dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_BIDIRECTIONAL);
dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_TO_DEVICE);
dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_FROM_DEVICE);
dev_dbg(dev, "Unmapped req->dst=%pK\n", sg_virt(dst));
dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
} else {
dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL);
dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
}
}
@ -377,6 +379,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
u32 dummy = 0;
int rc = 0;
u32 mapped_nents = 0;
int src_direction = (src != dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL);
req_ctx->dma_buf_type = CC_DMA_BUF_DLLI;
mlli_params->curr_pool = NULL;
@ -399,7 +402,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
}
/* Map the src SGL */
rc = cc_map_sg(dev, src, nbytes, DMA_BIDIRECTIONAL, &req_ctx->in_nents,
rc = cc_map_sg(dev, src, nbytes, src_direction, &req_ctx->in_nents,
LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents);
if (rc)
goto cipher_exit;
@ -416,7 +419,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
}
} else {
/* Map the dst sg */
rc = cc_map_sg(dev, dst, nbytes, DMA_BIDIRECTIONAL,
rc = cc_map_sg(dev, dst, nbytes, DMA_FROM_DEVICE,
&req_ctx->out_nents, LLI_MAX_NUM_OF_DATA_ENTRIES,
&dummy, &mapped_nents);
if (rc)
@ -456,6 +459,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
unsigned int hw_iv_size = areq_ctx->hw_iv_size;
struct cc_drvdata *drvdata = dev_get_drvdata(dev);
int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL);
if (areq_ctx->mac_buf_dma_addr) {
dma_unmap_single(dev, areq_ctx->mac_buf_dma_addr,
@ -514,13 +518,11 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents,
areq_ctx->assoclen, req->cryptlen);
dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents,
DMA_BIDIRECTIONAL);
dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, src_direction);
if (req->src != req->dst) {
dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n",
sg_virt(req->dst));
dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents,
DMA_BIDIRECTIONAL);
dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_FROM_DEVICE);
}
if (drvdata->coherent &&
areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT &&
@ -843,7 +845,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
else
size_for_map -= authsize;
rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL,
rc = cc_map_sg(dev, req->dst, size_for_map, DMA_FROM_DEVICE,
&areq_ctx->dst.mapped_nents,
LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes,
&dst_mapped_nents);
@ -1056,7 +1058,8 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
size_to_map += authsize;
}
rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL,
rc = cc_map_sg(dev, req->src, size_to_map,
(req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL),
&areq_ctx->src.mapped_nents,
(LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES +
LLI_MAX_NUM_OF_DATA_ENTRIES),

View File

@ -529,24 +529,26 @@ static int init_cc_resources(struct platform_device *plat_dev)
goto post_req_mgr_err;
}
/* hash must be allocated first due to use of send_request_init()
* and dependency of AEAD on it
*/
rc = cc_hash_alloc(new_drvdata);
if (rc) {
dev_err(dev, "cc_hash_alloc failed\n");
goto post_buf_mgr_err;
}
/* Allocate crypto algs */
rc = cc_cipher_alloc(new_drvdata);
if (rc) {
dev_err(dev, "cc_cipher_alloc failed\n");
goto post_buf_mgr_err;
}
/* hash must be allocated before aead since hash exports APIs */
rc = cc_hash_alloc(new_drvdata);
if (rc) {
dev_err(dev, "cc_hash_alloc failed\n");
goto post_cipher_err;
goto post_hash_err;
}
rc = cc_aead_alloc(new_drvdata);
if (rc) {
dev_err(dev, "cc_aead_alloc failed\n");
goto post_hash_err;
goto post_cipher_err;
}
/* If we got here and FIPS mode is enabled
@ -558,10 +560,10 @@ static int init_cc_resources(struct platform_device *plat_dev)
pm_runtime_put(dev);
return 0;
post_hash_err:
cc_hash_free(new_drvdata);
post_cipher_err:
cc_cipher_free(new_drvdata);
post_hash_err:
cc_hash_free(new_drvdata);
post_buf_mgr_err:
cc_buffer_mgr_fini(new_drvdata);
post_req_mgr_err:
@ -593,8 +595,8 @@ static void cleanup_cc_resources(struct platform_device *plat_dev)
(struct cc_drvdata *)platform_get_drvdata(plat_dev);
cc_aead_free(drvdata);
cc_hash_free(drvdata);
cc_cipher_free(drvdata);
cc_hash_free(drvdata);
cc_buffer_mgr_fini(drvdata);
cc_req_mgr_fini(drvdata);
cc_fips_fini(drvdata);

View File

@ -26,6 +26,7 @@ config CRYPTO_DEV_HISI_SEC2
select CRYPTO_SHA1
select CRYPTO_SHA256
select CRYPTO_SHA512
select CRYPTO_SM4
depends on PCI && PCI_MSI
depends on UACCE || UACCE=n
depends on ARM64 || (COMPILE_TEST && 64BIT)

View File

@ -36,6 +36,12 @@
#define HPRE_DATA_WUSER_CFG 0x301040
#define HPRE_INT_MASK 0x301400
#define HPRE_INT_STATUS 0x301800
#define HPRE_HAC_INT_MSK 0x301400
#define HPRE_HAC_RAS_CE_ENB 0x301410
#define HPRE_HAC_RAS_NFE_ENB 0x301414
#define HPRE_HAC_RAS_FE_ENB 0x301418
#define HPRE_HAC_INT_SET 0x301500
#define HPRE_RNG_TIMEOUT_NUM 0x301A34
#define HPRE_CORE_INT_ENABLE 0
#define HPRE_CORE_INT_DISABLE GENMASK(21, 0)
#define HPRE_RDCHN_INI_ST 0x301a00
@ -107,6 +113,15 @@
#define HPRE_SQE_MASK_OFFSET 8
#define HPRE_SQE_MASK_LEN 24
#define HPRE_DFX_BASE 0x301000
#define HPRE_DFX_COMMON1 0x301400
#define HPRE_DFX_COMMON2 0x301A00
#define HPRE_DFX_CORE 0x302000
#define HPRE_DFX_BASE_LEN 0x55
#define HPRE_DFX_COMMON1_LEN 0x41
#define HPRE_DFX_COMMON2_LEN 0xE
#define HPRE_DFX_CORE_LEN 0x43
static const char hpre_name[] = "hisi_hpre";
static struct dentry *hpre_debugfs_root;
static const struct pci_device_id hpre_dev_ids[] = {
@ -192,28 +207,32 @@ static const u64 hpre_cluster_offsets[] = {
};
static const struct debugfs_reg32 hpre_cluster_dfx_regs[] = {
{"CORES_EN_STATUS ", HPRE_CORE_EN_OFFSET},
{"CORES_INI_CFG ", HPRE_CORE_INI_CFG_OFFSET},
{"CORES_INI_STATUS ", HPRE_CORE_INI_STATUS_OFFSET},
{"CORES_HTBT_WARN ", HPRE_CORE_HTBT_WARN_OFFSET},
{"CORES_IS_SCHD ", HPRE_CORE_IS_SCHD_OFFSET},
{"CORES_EN_STATUS ", HPRE_CORE_EN_OFFSET},
{"CORES_INI_CFG ", HPRE_CORE_INI_CFG_OFFSET},
{"CORES_INI_STATUS ", HPRE_CORE_INI_STATUS_OFFSET},
{"CORES_HTBT_WARN ", HPRE_CORE_HTBT_WARN_OFFSET},
{"CORES_IS_SCHD ", HPRE_CORE_IS_SCHD_OFFSET},
};
static const struct debugfs_reg32 hpre_com_dfx_regs[] = {
{"READ_CLR_EN ", HPRE_CTRL_CNT_CLR_CE},
{"AXQOS ", HPRE_VFG_AXQOS},
{"AWUSR_CFG ", HPRE_AWUSR_FP_CFG},
{"QM_ARUSR_MCFG1 ", QM_ARUSER_M_CFG_1},
{"QM_AWUSR_MCFG1 ", QM_AWUSER_M_CFG_1},
{"BD_ENDIAN ", HPRE_BD_ENDIAN},
{"ECC_CHECK_CTRL ", HPRE_ECC_BYPASS},
{"RAS_INT_WIDTH ", HPRE_RAS_WIDTH_CFG},
{"POISON_BYPASS ", HPRE_POISON_BYPASS},
{"BD_ARUSER ", HPRE_BD_ARUSR_CFG},
{"BD_AWUSER ", HPRE_BD_AWUSR_CFG},
{"DATA_ARUSER ", HPRE_DATA_RUSER_CFG},
{"DATA_AWUSER ", HPRE_DATA_WUSER_CFG},
{"INT_STATUS ", HPRE_INT_STATUS},
{"READ_CLR_EN ", HPRE_CTRL_CNT_CLR_CE},
{"AXQOS ", HPRE_VFG_AXQOS},
{"AWUSR_CFG ", HPRE_AWUSR_FP_CFG},
{"BD_ENDIAN ", HPRE_BD_ENDIAN},
{"ECC_CHECK_CTRL ", HPRE_ECC_BYPASS},
{"RAS_INT_WIDTH ", HPRE_RAS_WIDTH_CFG},
{"POISON_BYPASS ", HPRE_POISON_BYPASS},
{"BD_ARUSER ", HPRE_BD_ARUSR_CFG},
{"BD_AWUSER ", HPRE_BD_AWUSR_CFG},
{"DATA_ARUSER ", HPRE_DATA_RUSER_CFG},
{"DATA_AWUSER ", HPRE_DATA_WUSER_CFG},
{"INT_STATUS ", HPRE_INT_STATUS},
{"INT_MASK ", HPRE_HAC_INT_MSK},
{"RAS_CE_ENB ", HPRE_HAC_RAS_CE_ENB},
{"RAS_NFE_ENB ", HPRE_HAC_RAS_NFE_ENB},
{"RAS_FE_ENB ", HPRE_HAC_RAS_FE_ENB},
{"INT_SET ", HPRE_HAC_INT_SET},
{"RNG_TIMEOUT_NUM ", HPRE_RNG_TIMEOUT_NUM},
};
static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = {
@ -226,6 +245,53 @@ static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = {
"invalid_req_cnt"
};
/* define the HPRE's dfx regs region and region length */
static struct dfx_diff_registers hpre_diff_regs[] = {
{
.reg_offset = HPRE_DFX_BASE,
.reg_len = HPRE_DFX_BASE_LEN,
}, {
.reg_offset = HPRE_DFX_COMMON1,
.reg_len = HPRE_DFX_COMMON1_LEN,
}, {
.reg_offset = HPRE_DFX_COMMON2,
.reg_len = HPRE_DFX_COMMON2_LEN,
}, {
.reg_offset = HPRE_DFX_CORE,
.reg_len = HPRE_DFX_CORE_LEN,
},
};
static int hpre_diff_regs_show(struct seq_file *s, void *unused)
{
struct hisi_qm *qm = s->private;
hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs,
ARRAY_SIZE(hpre_diff_regs));
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hpre_diff_regs);
static int hpre_com_regs_show(struct seq_file *s, void *unused)
{
hisi_qm_regs_dump(s, s->private);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
{
hisi_qm_regs_dump(s, s->private);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
static const struct kernel_param_ops hpre_uacce_mode_ops = {
.set = uacce_mode_set,
.get = param_get_int,
@ -779,24 +845,6 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get,
hpre_debugfs_atomic64_set, "%llu\n");
static int hpre_com_regs_show(struct seq_file *s, void *unused)
{
hisi_qm_regs_dump(s, s->private);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
{
hisi_qm_regs_dump(s, s->private);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
enum hpre_ctrl_dbgfs_file type, int indx)
{
@ -895,6 +943,7 @@ static int hpre_ctrl_debug_init(struct hisi_qm *qm)
static void hpre_dfx_debug_init(struct hisi_qm *qm)
{
struct dfx_diff_registers *hpre_regs = qm->debug.acc_diff_regs;
struct hpre *hpre = container_of(qm, struct hpre, qm);
struct hpre_dfx *dfx = hpre->debug.dfx;
struct dentry *parent;
@ -906,6 +955,10 @@ static void hpre_dfx_debug_init(struct hisi_qm *qm)
debugfs_create_file(hpre_dfx_files[i], 0644, parent, &dfx[i],
&hpre_atomic64_ops);
}
if (qm->fun_type == QM_HW_PF && hpre_regs)
debugfs_create_file("diff_regs", 0444, parent,
qm, &hpre_diff_regs_fops);
}
static int hpre_debugfs_init(struct hisi_qm *qm)
@ -918,6 +971,13 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET;
qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
ret = hisi_qm_diff_regs_init(qm, hpre_diff_regs,
ARRAY_SIZE(hpre_diff_regs));
if (ret) {
dev_warn(dev, "Failed to init HPRE diff regs!\n");
goto debugfs_remove;
}
hisi_qm_debug_init(qm);
if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) {
@ -931,12 +991,16 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
return 0;
failed_to_create:
hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
debugfs_remove:
debugfs_remove_recursive(qm->debug.debug_root);
return ret;
}
static void hpre_debugfs_exit(struct hisi_qm *qm)
{
hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
debugfs_remove_recursive(qm->debug.debug_root);
}
@ -969,6 +1033,82 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
return hisi_qm_init(qm);
}
static int hpre_show_last_regs_init(struct hisi_qm *qm)
{
int cluster_dfx_regs_num = ARRAY_SIZE(hpre_cluster_dfx_regs);
int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs);
u8 clusters_num = hpre_cluster_num(qm);
struct qm_debug *debug = &qm->debug;
void __iomem *io_base;
int i, j, idx;
debug->last_words = kcalloc(cluster_dfx_regs_num * clusters_num +
com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
if (!debug->last_words)
return -ENOMEM;
for (i = 0; i < com_dfx_regs_num; i++)
debug->last_words[i] = readl_relaxed(qm->io_base +
hpre_com_dfx_regs[i].offset);
for (i = 0; i < clusters_num; i++) {
io_base = qm->io_base + hpre_cluster_offsets[i];
for (j = 0; j < cluster_dfx_regs_num; j++) {
idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j;
debug->last_words[idx] = readl_relaxed(
io_base + hpre_cluster_dfx_regs[j].offset);
}
}
return 0;
}
static void hpre_show_last_regs_uninit(struct hisi_qm *qm)
{
struct qm_debug *debug = &qm->debug;
if (qm->fun_type == QM_HW_VF || !debug->last_words)
return;
kfree(debug->last_words);
debug->last_words = NULL;
}
static void hpre_show_last_dfx_regs(struct hisi_qm *qm)
{
int cluster_dfx_regs_num = ARRAY_SIZE(hpre_cluster_dfx_regs);
int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs);
u8 clusters_num = hpre_cluster_num(qm);
struct qm_debug *debug = &qm->debug;
struct pci_dev *pdev = qm->pdev;
void __iomem *io_base;
int i, j, idx;
u32 val;
if (qm->fun_type == QM_HW_VF || !debug->last_words)
return;
/* dumps last word of the debugging registers during controller reset */
for (i = 0; i < com_dfx_regs_num; i++) {
val = readl_relaxed(qm->io_base + hpre_com_dfx_regs[i].offset);
if (debug->last_words[i] != val)
pci_info(pdev, "Common_core:%s \t= 0x%08x => 0x%08x\n",
hpre_com_dfx_regs[i].name, debug->last_words[i], val);
}
for (i = 0; i < clusters_num; i++) {
io_base = qm->io_base + hpre_cluster_offsets[i];
for (j = 0; j < cluster_dfx_regs_num; j++) {
val = readl_relaxed(io_base +
hpre_cluster_dfx_regs[j].offset);
idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j;
if (debug->last_words[idx] != val)
pci_info(pdev, "cluster-%d:%s \t= 0x%08x => 0x%08x\n",
i, hpre_cluster_dfx_regs[j].name, debug->last_words[idx], val);
}
}
}
static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts)
{
const struct hpre_hw_error *err = hpre_hw_errors;
@ -1027,6 +1167,7 @@ static const struct hisi_qm_err_ini hpre_err_ini = {
.open_axi_master_ooo = hpre_open_axi_master_ooo,
.open_sva_prefetch = hpre_open_sva_prefetch,
.close_sva_prefetch = hpre_close_sva_prefetch,
.show_last_dfx_regs = hpre_show_last_dfx_regs,
.err_info_init = hpre_err_info_init,
};
@ -1044,8 +1185,11 @@ static int hpre_pf_probe_init(struct hpre *hpre)
qm->err_ini = &hpre_err_ini;
qm->err_ini->err_info_init(qm);
hisi_qm_dev_err_init(qm);
ret = hpre_show_last_regs_init(qm);
if (ret)
pci_err(qm->pdev, "Failed to init last word regs!\n");
return 0;
return ret;
}
static int hpre_probe_init(struct hpre *hpre)
@ -1131,6 +1275,7 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
hisi_qm_stop(qm, QM_NORMAL);
err_with_err_init:
hpre_show_last_regs_uninit(qm);
hisi_qm_dev_err_uninit(qm);
err_with_qm_init:
@ -1161,6 +1306,7 @@ static void hpre_remove(struct pci_dev *pdev)
if (qm->fun_type == QM_HW_PF) {
hpre_cnt_regs_clear(qm);
qm->debug.curr_qm_qp_num = 0;
hpre_show_last_regs_uninit(qm);
hisi_qm_dev_err_uninit(qm);
}

View File

@ -253,7 +253,15 @@
#define QM_QOS_MAX_CIR_U 6
#define QM_QOS_MAX_CIR_S 11
#define QM_QOS_VAL_MAX_LEN 32
#define QM_DFX_BASE 0x0100000
#define QM_DFX_STATE1 0x0104000
#define QM_DFX_STATE2 0x01040C8
#define QM_DFX_COMMON 0x0000
#define QM_DFX_BASE_LEN 0x5A
#define QM_DFX_STATE1_LEN 0x2E
#define QM_DFX_STATE2_LEN 0x11
#define QM_DFX_COMMON_LEN 0xC3
#define QM_DFX_REGS_LEN 4UL
#define QM_AUTOSUSPEND_DELAY 3000
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
@ -467,6 +475,23 @@ static const struct hisi_qm_hw_error qm_hw_error[] = {
{ /* sentinel */ }
};
/* define the QM's dfx regs region and region length */
static struct dfx_diff_registers qm_diff_regs[] = {
{
.reg_offset = QM_DFX_BASE,
.reg_len = QM_DFX_BASE_LEN,
}, {
.reg_offset = QM_DFX_STATE1,
.reg_len = QM_DFX_STATE1_LEN,
}, {
.reg_offset = QM_DFX_STATE2,
.reg_len = QM_DFX_STATE2_LEN,
}, {
.reg_offset = QM_DFX_COMMON,
.reg_len = QM_DFX_COMMON_LEN,
},
};
static const char * const qm_db_timeout[] = {
"sq", "cq", "eq", "aeq",
};
@ -687,13 +712,13 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
if (!IS_ENABLED(CONFIG_ARM64)) {
memcpy_toio(fun_base, src, 16);
wmb();
dma_wmb();
return;
}
asm volatile("ldp %0, %1, %3\n"
"stp %0, %1, %2\n"
"dsb sy\n"
"dmb oshst\n"
: "=&r" (tmp0),
"=&r" (tmp1),
"+Q" (*((char __iomem *)fun_base))
@ -982,7 +1007,7 @@ static void qm_set_qp_disable(struct hisi_qp *qp, int offset)
*addr = 1;
/* make sure setup is completed */
mb();
smp_wmb();
}
static void qm_disable_qp(struct hisi_qm *qm, u32 qp_id)
@ -1625,6 +1650,156 @@ static int qm_regs_show(struct seq_file *s, void *unused)
DEFINE_SHOW_ATTRIBUTE(qm_regs);
static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm,
const struct dfx_diff_registers *cregs, int reg_len)
{
struct dfx_diff_registers *diff_regs;
u32 j, base_offset;
int i;
diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL);
if (!diff_regs)
return ERR_PTR(-ENOMEM);
for (i = 0; i < reg_len; i++) {
if (!cregs[i].reg_len)
continue;
diff_regs[i].reg_offset = cregs[i].reg_offset;
diff_regs[i].reg_len = cregs[i].reg_len;
diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len,
GFP_KERNEL);
if (!diff_regs[i].regs)
goto alloc_error;
for (j = 0; j < diff_regs[i].reg_len; j++) {
base_offset = diff_regs[i].reg_offset +
j * QM_DFX_REGS_LEN;
diff_regs[i].regs[j] = readl(qm->io_base + base_offset);
}
}
return diff_regs;
alloc_error:
while (i > 0) {
i--;
kfree(diff_regs[i].regs);
}
kfree(diff_regs);
return ERR_PTR(-ENOMEM);
}
static void dfx_regs_uninit(struct hisi_qm *qm,
struct dfx_diff_registers *dregs, int reg_len)
{
int i;
/* Setting the pointer is NULL to prevent double free */
for (i = 0; i < reg_len; i++) {
kfree(dregs[i].regs);
dregs[i].regs = NULL;
}
kfree(dregs);
dregs = NULL;
}
/**
* hisi_qm_diff_regs_init() - Allocate memory for registers.
* @qm: device qm handle.
* @dregs: diff registers handle.
* @reg_len: diff registers region length.
*/
int hisi_qm_diff_regs_init(struct hisi_qm *qm,
struct dfx_diff_registers *dregs, int reg_len)
{
if (!qm || !dregs || reg_len <= 0)
return -EINVAL;
if (qm->fun_type != QM_HW_PF)
return 0;
qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs,
ARRAY_SIZE(qm_diff_regs));
if (IS_ERR(qm->debug.qm_diff_regs))
return PTR_ERR(qm->debug.qm_diff_regs);
qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
if (IS_ERR(qm->debug.acc_diff_regs)) {
dfx_regs_uninit(qm, qm->debug.qm_diff_regs,
ARRAY_SIZE(qm_diff_regs));
return PTR_ERR(qm->debug.acc_diff_regs);
}
return 0;
}
EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init);
/**
* hisi_qm_diff_regs_uninit() - Free memory for registers.
* @qm: device qm handle.
* @reg_len: diff registers region length.
*/
void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len)
{
if (!qm || reg_len <= 0 || qm->fun_type != QM_HW_PF)
return;
dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
}
EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit);
/**
* hisi_qm_acc_diff_regs_dump() - Dump registers's value.
* @qm: device qm handle.
* @s: Debugfs file handle.
* @dregs: diff registers handle.
* @regs_len: diff registers region length.
*/
void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
struct dfx_diff_registers *dregs, int regs_len)
{
u32 j, val, base_offset;
int i, ret;
if (!qm || !s || !dregs || regs_len <= 0)
return;
ret = hisi_qm_get_dfx_access(qm);
if (ret)
return;
down_read(&qm->qps_lock);
for (i = 0; i < regs_len; i++) {
if (!dregs[i].reg_len)
continue;
for (j = 0; j < dregs[i].reg_len; j++) {
base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN;
val = readl(qm->io_base + base_offset);
if (val != dregs[i].regs[j])
seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n",
base_offset, dregs[i].regs[j], val);
}
}
up_read(&qm->qps_lock);
hisi_qm_put_dfx_access(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump);
static int qm_diff_regs_show(struct seq_file *s, void *unused)
{
struct hisi_qm *qm = s->private;
hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs,
ARRAY_SIZE(qm_diff_regs));
return 0;
}
DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
static ssize_t qm_cmd_read(struct file *filp, char __user *buffer,
size_t count, loff_t *pos)
{
@ -2660,7 +2835,7 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type)
* return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating
* qp memory fails.
*/
struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
static struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
{
struct hisi_qp *qp;
int ret;
@ -2678,7 +2853,6 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
return qp;
}
EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
/**
* hisi_qm_release_qp() - Release a qp back to its qm.
@ -2686,7 +2860,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
*
* This function releases the resource of a qp.
*/
void hisi_qm_release_qp(struct hisi_qp *qp)
static void hisi_qm_release_qp(struct hisi_qp *qp)
{
struct hisi_qm *qm = qp->qm;
@ -2704,7 +2878,6 @@ void hisi_qm_release_qp(struct hisi_qp *qp)
qm_pm_put_sync(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
{
@ -3053,9 +3226,17 @@ static void qm_qp_event_notifier(struct hisi_qp *qp)
wake_up_interruptible(&qp->uacce_q->wait);
}
/* This function returns free number of qp in qm. */
static int hisi_qm_get_available_instances(struct uacce_device *uacce)
{
return hisi_qm_get_free_qp_num(uacce->priv);
struct hisi_qm *qm = uacce->priv;
int ret;
down_read(&qm->qps_lock);
ret = qm->qp_num - qm->qp_in_used;
up_read(&qm->qps_lock);
return ret;
}
static void hisi_qm_set_hw_reset(struct hisi_qm *qm, int offset)
@ -3367,24 +3548,6 @@ void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
}
EXPORT_SYMBOL_GPL(hisi_qm_wait_task_finish);
/**
* hisi_qm_get_free_qp_num() - Get free number of qp in qm.
* @qm: The qm which want to get free qp.
*
* This function return free number of qp in qm.
*/
int hisi_qm_get_free_qp_num(struct hisi_qm *qm)
{
int ret;
down_read(&qm->qps_lock);
ret = qm->qp_num - qm->qp_in_used;
up_read(&qm->qps_lock);
return ret;
}
EXPORT_SYMBOL_GPL(hisi_qm_get_free_qp_num);
static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
{
struct device *dev = &qm->pdev->dev;
@ -3498,6 +3661,17 @@ static void hisi_qm_set_state(struct hisi_qm *qm, u8 state)
writel(state, qm->io_base + QM_VF_STATE);
}
static void qm_last_regs_uninit(struct hisi_qm *qm)
{
struct qm_debug *debug = &qm->debug;
if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
return;
kfree(debug->qm_last_words);
debug->qm_last_words = NULL;
}
/**
* hisi_qm_uninit() - Uninitialize qm.
* @qm: The qm needed uninit.
@ -3509,6 +3683,8 @@ void hisi_qm_uninit(struct hisi_qm *qm)
struct pci_dev *pdev = qm->pdev;
struct device *dev = &pdev->dev;
qm_last_regs_uninit(qm);
qm_cmd_uninit(qm);
kfree(qm->factor);
down_write(&qm->qps_lock);
@ -3550,7 +3726,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_uninit);
*
* qm hw v1 does not support this interface.
*/
int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
static int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
{
if (!base || !number)
return -EINVAL;
@ -3562,7 +3738,6 @@ int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
return qm->ops->get_vft(qm, base, number);
}
EXPORT_SYMBOL_GPL(hisi_qm_get_vft);
/**
* hisi_qm_set_vft() - Set vft to a qm.
@ -4484,6 +4659,7 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
*/
void hisi_qm_debug_init(struct hisi_qm *qm)
{
struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
struct qm_dfx *dfx = &qm->debug.dfx;
struct dentry *qm_d;
void *data;
@ -4499,6 +4675,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
qm_create_debugfs_file(qm, qm->debug.qm_d, i);
}
if (qm_regs)
debugfs_create_file("diff_regs", 0444, qm->debug.qm_d,
qm, &qm_diff_regs_fops);
debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
@ -5181,6 +5361,24 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
return 0;
}
static void qm_show_last_dfx_regs(struct hisi_qm *qm)
{
struct qm_debug *debug = &qm->debug;
struct pci_dev *pdev = qm->pdev;
u32 val;
int i;
if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
return;
for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) {
val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset);
if (debug->qm_last_words[i] != val)
pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
qm_dfx_regs[i].name, debug->qm_last_words[i], val);
}
}
static int qm_controller_reset(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
@ -5196,6 +5394,10 @@ static int qm_controller_reset(struct hisi_qm *qm)
return ret;
}
qm_show_last_dfx_regs(qm);
if (qm->err_ini->show_last_dfx_regs)
qm->err_ini->show_last_dfx_regs(qm);
ret = qm_soft_reset(qm);
if (ret) {
pci_err(pdev, "Controller reset failed (%d)\n", ret);
@ -5906,6 +6108,26 @@ static int hisi_qm_memory_init(struct hisi_qm *qm)
return ret;
}
static void qm_last_regs_init(struct hisi_qm *qm)
{
int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs);
struct qm_debug *debug = &qm->debug;
int i;
if (qm->fun_type == QM_HW_VF)
return;
debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int),
GFP_KERNEL);
if (!debug->qm_last_words)
return;
for (i = 0; i < dfx_regs_num; i++) {
debug->qm_last_words[i] = readl_relaxed(qm->io_base +
qm_dfx_regs[i].offset);
}
}
/**
* hisi_qm_init() - Initialize configures about qm.
* @qm: The qm needing init.
@ -5958,6 +6180,8 @@ int hisi_qm_init(struct hisi_qm *qm)
qm_cmd_init(qm);
atomic_set(&qm->status.flags, QM_INIT);
qm_last_regs_init(qm);
return 0;
err_alloc_uacce:

View File

@ -2113,7 +2113,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
.cra_driver_name = "hisi_sec_"sec_cra_name,\
.cra_priority = SEC_PRIORITY,\
.cra_flags = CRYPTO_ALG_ASYNC |\
CRYPTO_ALG_ALLOCATES_MEMORY |\
CRYPTO_ALG_NEED_FALLBACK,\
.cra_blocksize = blk_size,\
.cra_ctxsize = sizeof(struct sec_ctx),\
@ -2366,7 +2365,6 @@ static int sec_aead_decrypt(struct aead_request *a_req)
.cra_driver_name = "hisi_sec_"sec_cra_name,\
.cra_priority = SEC_PRIORITY,\
.cra_flags = CRYPTO_ALG_ASYNC |\
CRYPTO_ALG_ALLOCATES_MEMORY |\
CRYPTO_ALG_NEED_FALLBACK,\
.cra_blocksize = blk_size,\
.cra_ctxsize = sizeof(struct sec_ctx),\

View File

@ -110,6 +110,15 @@
#define SEC_SQE_MASK_LEN 48
#define SEC_SHAPER_TYPE_RATE 400
#define SEC_DFX_BASE 0x301000
#define SEC_DFX_CORE 0x302100
#define SEC_DFX_COMMON1 0x301600
#define SEC_DFX_COMMON2 0x301C00
#define SEC_DFX_BASE_LEN 0x9D
#define SEC_DFX_CORE_LEN 0x32B
#define SEC_DFX_COMMON1_LEN 0x45
#define SEC_DFX_COMMON2_LEN 0xBA
struct sec_hw_error {
u32 int_msk;
const char *msg;
@ -226,6 +235,34 @@ static const struct debugfs_reg32 sec_dfx_regs[] = {
{"SEC_BD_SAA8 ", 0x301C40},
};
/* define the SEC's dfx regs region and region length */
static struct dfx_diff_registers sec_diff_regs[] = {
{
.reg_offset = SEC_DFX_BASE,
.reg_len = SEC_DFX_BASE_LEN,
}, {
.reg_offset = SEC_DFX_COMMON1,
.reg_len = SEC_DFX_COMMON1_LEN,
}, {
.reg_offset = SEC_DFX_COMMON2,
.reg_len = SEC_DFX_COMMON2_LEN,
}, {
.reg_offset = SEC_DFX_CORE,
.reg_len = SEC_DFX_CORE_LEN,
},
};
static int sec_diff_regs_show(struct seq_file *s, void *unused)
{
struct hisi_qm *qm = s->private;
hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs,
ARRAY_SIZE(sec_diff_regs));
return 0;
}
DEFINE_SHOW_ATTRIBUTE(sec_diff_regs);
static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp)
{
return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF);
@ -729,6 +766,7 @@ DEFINE_SHOW_ATTRIBUTE(sec_regs);
static int sec_core_debug_init(struct hisi_qm *qm)
{
struct dfx_diff_registers *sec_regs = qm->debug.acc_diff_regs;
struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
struct device *dev = &qm->pdev->dev;
struct sec_dfx *dfx = &sec->debug.dfx;
@ -749,6 +787,9 @@ static int sec_core_debug_init(struct hisi_qm *qm)
if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF)
debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops);
if (qm->fun_type == QM_HW_PF && sec_regs)
debugfs_create_file("diff_regs", 0444, tmp_d,
qm, &sec_diff_regs_fops);
for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) {
atomic64_t *data = (atomic64_t *)((uintptr_t)dfx +
@ -790,6 +831,14 @@ static int sec_debugfs_init(struct hisi_qm *qm)
sec_debugfs_root);
qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET;
qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN;
ret = hisi_qm_diff_regs_init(qm, sec_diff_regs,
ARRAY_SIZE(sec_diff_regs));
if (ret) {
dev_warn(dev, "Failed to init SEC diff regs!\n");
goto debugfs_remove;
}
hisi_qm_debug_init(qm);
ret = sec_debug_init(qm);
@ -799,15 +848,66 @@ static int sec_debugfs_init(struct hisi_qm *qm)
return 0;
failed_to_create:
hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
debugfs_remove:
debugfs_remove_recursive(sec_debugfs_root);
return ret;
}
static void sec_debugfs_exit(struct hisi_qm *qm)
{
hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
debugfs_remove_recursive(qm->debug.debug_root);
}
static int sec_show_last_regs_init(struct hisi_qm *qm)
{
struct qm_debug *debug = &qm->debug;
int i;
debug->last_words = kcalloc(ARRAY_SIZE(sec_dfx_regs),
sizeof(unsigned int), GFP_KERNEL);
if (!debug->last_words)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++)
debug->last_words[i] = readl_relaxed(qm->io_base +
sec_dfx_regs[i].offset);
return 0;
}
static void sec_show_last_regs_uninit(struct hisi_qm *qm)
{
struct qm_debug *debug = &qm->debug;
if (qm->fun_type == QM_HW_VF || !debug->last_words)
return;
kfree(debug->last_words);
debug->last_words = NULL;
}
static void sec_show_last_dfx_regs(struct hisi_qm *qm)
{
struct qm_debug *debug = &qm->debug;
struct pci_dev *pdev = qm->pdev;
u32 val;
int i;
if (qm->fun_type == QM_HW_VF || !debug->last_words)
return;
/* dumps last word of the debugging registers during controller reset */
for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++) {
val = readl_relaxed(qm->io_base + sec_dfx_regs[i].offset);
if (val != debug->last_words[i])
pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
sec_dfx_regs[i].name, debug->last_words[i], val);
}
}
static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts)
{
const struct sec_hw_error *errs = sec_hw_errors;
@ -874,6 +974,7 @@ static const struct hisi_qm_err_ini sec_err_ini = {
.open_axi_master_ooo = sec_open_axi_master_ooo,
.open_sva_prefetch = sec_open_sva_prefetch,
.close_sva_prefetch = sec_close_sva_prefetch,
.show_last_dfx_regs = sec_show_last_dfx_regs,
.err_info_init = sec_err_info_init,
};
@ -892,8 +993,11 @@ static int sec_pf_probe_init(struct sec_dev *sec)
sec_open_sva_prefetch(qm);
hisi_qm_dev_err_init(qm);
sec_debug_regs_clear(qm);
ret = sec_show_last_regs_init(qm);
if (ret)
pci_err(qm->pdev, "Failed to init last word regs!\n");
return 0;
return ret;
}
static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
@ -1067,6 +1171,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
sec_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);
err_probe_uninit:
sec_show_last_regs_uninit(qm);
sec_probe_uninit(qm);
err_qm_uninit:
sec_qm_uninit(qm);
@ -1091,6 +1196,7 @@ static void sec_remove(struct pci_dev *pdev)
if (qm->fun_type == QM_HW_PF)
sec_debug_regs_clear(qm);
sec_show_last_regs_uninit(qm);
sec_probe_uninit(qm);

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 HiSilicon Limited. */
#include <linux/align.h>
#include <linux/dma-mapping.h>
#include <linux/hisi_acc_qm.h>
#include <linux/module.h>
@ -64,8 +65,9 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX)
return ERR_PTR(-EINVAL);
sgl_size = sizeof(struct acc_hw_sge) * sge_nr +
sizeof(struct hisi_acc_hw_sgl);
sgl_size = ALIGN(sizeof(struct acc_hw_sge) * sge_nr +
sizeof(struct hisi_acc_hw_sgl),
HISI_ACC_SGL_ALIGN_SIZE);
/*
* the pool may allocate a block of memory of size PAGE_SIZE * 2^(MAX_ORDER - 1),

View File

@ -521,7 +521,7 @@ static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx,
static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx)
{
hisi_qm_stop_qp(ctx->qp);
hisi_qm_release_qp(ctx->qp);
hisi_qm_free_qps(&ctx->qp, 1);
}
static const struct hisi_zip_sqe_ops hisi_zip_ops_v1 = {

View File

@ -49,14 +49,18 @@
#define HZIP_QM_IDEL_STATUS 0x3040e4
#define HZIP_CORE_DEBUG_COMP_0 0x302000
#define HZIP_CORE_DEBUG_COMP_1 0x303000
#define HZIP_CORE_DEBUG_DECOMP_0 0x304000
#define HZIP_CORE_DEBUG_DECOMP_1 0x305000
#define HZIP_CORE_DEBUG_DECOMP_2 0x306000
#define HZIP_CORE_DEBUG_DECOMP_3 0x307000
#define HZIP_CORE_DEBUG_DECOMP_4 0x308000
#define HZIP_CORE_DEBUG_DECOMP_5 0x309000
#define HZIP_CORE_DFX_BASE 0x301000
#define HZIP_CLOCK_GATED_CONTL 0X301004
#define HZIP_CORE_DFX_COMP_0 0x302000
#define HZIP_CORE_DFX_COMP_1 0x303000
#define HZIP_CORE_DFX_DECOMP_0 0x304000
#define HZIP_CORE_DFX_DECOMP_1 0x305000
#define HZIP_CORE_DFX_DECOMP_2 0x306000
#define HZIP_CORE_DFX_DECOMP_3 0x307000
#define HZIP_CORE_DFX_DECOMP_4 0x308000
#define HZIP_CORE_DFX_DECOMP_5 0x309000
#define HZIP_CORE_REGS_BASE_LEN 0xB0
#define HZIP_CORE_REGS_DFX_LEN 0x28
#define HZIP_CORE_INT_SOURCE 0x3010A0
#define HZIP_CORE_INT_MASK_REG 0x3010A4
@ -230,6 +234,64 @@ static const struct debugfs_reg32 hzip_dfx_regs[] = {
{"HZIP_DECOMP_LZ77_CURR_ST ", 0x9cull},
};
static const struct debugfs_reg32 hzip_com_dfx_regs[] = {
{"HZIP_CLOCK_GATE_CTRL ", 0x301004},
{"HZIP_CORE_INT_RAS_CE_ENB ", 0x301160},
{"HZIP_CORE_INT_RAS_NFE_ENB ", 0x301164},
{"HZIP_CORE_INT_RAS_FE_ENB ", 0x301168},
{"HZIP_UNCOM_ERR_RAS_CTRL ", 0x30116C},
};
static const struct debugfs_reg32 hzip_dump_dfx_regs[] = {
{"HZIP_GET_BD_NUM ", 0x00ull},
{"HZIP_GET_RIGHT_BD ", 0x04ull},
{"HZIP_GET_ERROR_BD ", 0x08ull},
{"HZIP_DONE_BD_NUM ", 0x0cull},
{"HZIP_MAX_DELAY ", 0x20ull},
};
/* define the ZIP's dfx regs region and region length */
static struct dfx_diff_registers hzip_diff_regs[] = {
{
.reg_offset = HZIP_CORE_DFX_BASE,
.reg_len = HZIP_CORE_REGS_BASE_LEN,
}, {
.reg_offset = HZIP_CORE_DFX_COMP_0,
.reg_len = HZIP_CORE_REGS_DFX_LEN,
}, {
.reg_offset = HZIP_CORE_DFX_COMP_1,
.reg_len = HZIP_CORE_REGS_DFX_LEN,
}, {
.reg_offset = HZIP_CORE_DFX_DECOMP_0,
.reg_len = HZIP_CORE_REGS_DFX_LEN,
}, {
.reg_offset = HZIP_CORE_DFX_DECOMP_1,
.reg_len = HZIP_CORE_REGS_DFX_LEN,
}, {
.reg_offset = HZIP_CORE_DFX_DECOMP_2,
.reg_len = HZIP_CORE_REGS_DFX_LEN,
}, {
.reg_offset = HZIP_CORE_DFX_DECOMP_3,
.reg_len = HZIP_CORE_REGS_DFX_LEN,
}, {
.reg_offset = HZIP_CORE_DFX_DECOMP_4,
.reg_len = HZIP_CORE_REGS_DFX_LEN,
}, {
.reg_offset = HZIP_CORE_DFX_DECOMP_5,
.reg_len = HZIP_CORE_REGS_DFX_LEN,
},
};
static int hzip_diff_regs_show(struct seq_file *s, void *unused)
{
struct hisi_qm *qm = s->private;
hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs,
ARRAY_SIZE(hzip_diff_regs));
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hzip_diff_regs);
static const struct kernel_param_ops zip_uacce_mode_ops = {
.set = uacce_mode_set,
.get = param_get_int,
@ -621,6 +683,7 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm)
static void hisi_zip_dfx_debug_init(struct hisi_qm *qm)
{
struct dfx_diff_registers *hzip_regs = qm->debug.acc_diff_regs;
struct hisi_zip *zip = container_of(qm, struct hisi_zip, qm);
struct hisi_zip_dfx *dfx = &zip->dfx;
struct dentry *tmp_dir;
@ -634,6 +697,10 @@ static void hisi_zip_dfx_debug_init(struct hisi_qm *qm)
0644, tmp_dir, data,
&zip_atomic64_ops);
}
if (qm->fun_type == QM_HW_PF && hzip_regs)
debugfs_create_file("diff_regs", 0444, tmp_dir,
qm, &hzip_diff_regs_fops);
}
static int hisi_zip_ctrl_debug_init(struct hisi_qm *qm)
@ -666,6 +733,13 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET;
qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN;
qm->debug.debug_root = dev_d;
ret = hisi_qm_diff_regs_init(qm, hzip_diff_regs,
ARRAY_SIZE(hzip_diff_regs));
if (ret) {
dev_warn(dev, "Failed to init ZIP diff regs!\n");
goto debugfs_remove;
}
hisi_qm_debug_init(qm);
if (qm->fun_type == QM_HW_PF) {
@ -679,6 +753,8 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
return 0;
failed_to_create:
hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
debugfs_remove:
debugfs_remove_recursive(hzip_debugfs_root);
return ret;
}
@ -703,6 +779,8 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm)
static void hisi_zip_debugfs_exit(struct hisi_qm *qm)
{
hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
debugfs_remove_recursive(qm->debug.debug_root);
if (qm->fun_type == QM_HW_PF) {
@ -711,6 +789,87 @@ static void hisi_zip_debugfs_exit(struct hisi_qm *qm)
}
}
static int hisi_zip_show_last_regs_init(struct hisi_qm *qm)
{
int core_dfx_regs_num = ARRAY_SIZE(hzip_dump_dfx_regs);
int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs);
struct qm_debug *debug = &qm->debug;
void __iomem *io_base;
int i, j, idx;
debug->last_words = kcalloc(core_dfx_regs_num * HZIP_CORE_NUM +
com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
if (!debug->last_words)
return -ENOMEM;
for (i = 0; i < com_dfx_regs_num; i++) {
io_base = qm->io_base + hzip_com_dfx_regs[i].offset;
debug->last_words[i] = readl_relaxed(io_base);
}
for (i = 0; i < HZIP_CORE_NUM; i++) {
io_base = qm->io_base + core_offsets[i];
for (j = 0; j < core_dfx_regs_num; j++) {
idx = com_dfx_regs_num + i * core_dfx_regs_num + j;
debug->last_words[idx] = readl_relaxed(
io_base + hzip_dump_dfx_regs[j].offset);
}
}
return 0;
}
static void hisi_zip_show_last_regs_uninit(struct hisi_qm *qm)
{
struct qm_debug *debug = &qm->debug;
if (qm->fun_type == QM_HW_VF || !debug->last_words)
return;
kfree(debug->last_words);
debug->last_words = NULL;
}
static void hisi_zip_show_last_dfx_regs(struct hisi_qm *qm)
{
int core_dfx_regs_num = ARRAY_SIZE(hzip_dump_dfx_regs);
int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs);
struct qm_debug *debug = &qm->debug;
char buf[HZIP_BUF_SIZE];
void __iomem *base;
int i, j, idx;
u32 val;
if (qm->fun_type == QM_HW_VF || !debug->last_words)
return;
for (i = 0; i < com_dfx_regs_num; i++) {
val = readl_relaxed(qm->io_base + hzip_com_dfx_regs[i].offset);
if (debug->last_words[i] != val)
pci_info(qm->pdev, "com_dfx: %s \t= 0x%08x => 0x%08x\n",
hzip_com_dfx_regs[i].name, debug->last_words[i], val);
}
for (i = 0; i < HZIP_CORE_NUM; i++) {
if (i < HZIP_COMP_CORE_NUM)
scnprintf(buf, sizeof(buf), "Comp_core-%d", i);
else
scnprintf(buf, sizeof(buf), "Decomp_core-%d",
i - HZIP_COMP_CORE_NUM);
base = qm->io_base + core_offsets[i];
pci_info(qm->pdev, "==>%s:\n", buf);
/* dump last word for dfx regs during control resetting */
for (j = 0; j < core_dfx_regs_num; j++) {
idx = com_dfx_regs_num + i * core_dfx_regs_num + j;
val = readl_relaxed(base + hzip_dump_dfx_regs[j].offset);
if (debug->last_words[idx] != val)
pci_info(qm->pdev, "%s \t= 0x%08x => 0x%08x\n",
hzip_dump_dfx_regs[j].name, debug->last_words[idx], val);
}
}
}
static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts)
{
const struct hisi_zip_hw_error *err = zip_hw_error;
@ -798,6 +957,7 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = {
.close_axi_master_ooo = hisi_zip_close_axi_master_ooo,
.open_sva_prefetch = hisi_zip_open_sva_prefetch,
.close_sva_prefetch = hisi_zip_close_sva_prefetch,
.show_last_dfx_regs = hisi_zip_show_last_dfx_regs,
.err_info_init = hisi_zip_err_info_init,
};
@ -805,6 +965,7 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
{
struct hisi_qm *qm = &hisi_zip->qm;
struct hisi_zip_ctrl *ctrl;
int ret;
ctrl = devm_kzalloc(&qm->pdev->dev, sizeof(*ctrl), GFP_KERNEL);
if (!ctrl)
@ -820,7 +981,11 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
hisi_qm_dev_err_init(qm);
hisi_zip_debug_regs_clear(qm);
return 0;
ret = hisi_zip_show_last_regs_init(qm);
if (ret)
pci_err(qm->pdev, "Failed to init last word regs!\n");
return ret;
}
static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
@ -964,6 +1129,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
hisi_qm_stop(qm, QM_NORMAL);
err_dev_err_uninit:
hisi_zip_show_last_regs_uninit(qm);
hisi_qm_dev_err_uninit(qm);
err_qm_uninit:
@ -985,6 +1151,7 @@ static void hisi_zip_remove(struct pci_dev *pdev)
hisi_zip_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);
hisi_zip_show_last_regs_uninit(qm);
hisi_qm_dev_err_uninit(qm);
hisi_zip_qm_uninit(qm);
}

View File

@ -1997,3 +1997,12 @@ MODULE_AUTHOR("Igal Liberman <igall@marvell.com>");
MODULE_DESCRIPTION("Support for SafeXcel cryptographic engines: EIP97 & EIP197");
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CRYPTO_INTERNAL);
MODULE_FIRMWARE("ifpp.bin");
MODULE_FIRMWARE("ipue.bin");
MODULE_FIRMWARE("inside-secure/eip197b/ifpp.bin");
MODULE_FIRMWARE("inside-secure/eip197b/ipue.bin");
MODULE_FIRMWARE("inside-secure/eip197d/ifpp.bin");
MODULE_FIRMWARE("inside-secure/eip197d/ipue.bin");
MODULE_FIRMWARE("inside-secure/eip197_minifw/ifpp.bin");
MODULE_FIRMWARE("inside-secure/eip197_minifw/ipue.bin");

View File

@ -1598,7 +1598,6 @@ static int kmb_ocs_aes_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct ocs_aes_dev *aes_dev;
struct resource *aes_mem;
int rc;
aes_dev = devm_kzalloc(dev, sizeof(*aes_dev), GFP_KERNEL);
@ -1616,13 +1615,7 @@ static int kmb_ocs_aes_probe(struct platform_device *pdev)
}
/* Get base register address. */
aes_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!aes_mem) {
dev_err(dev, "Could not retrieve io mem resource\n");
return -ENODEV;
}
aes_dev->base_reg = devm_ioremap_resource(&pdev->dev, aes_mem);
aes_dev->base_reg = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(aes_dev->base_reg))
return PTR_ERR(aes_dev->base_reg);

View File

@ -624,7 +624,6 @@ struct skcipher_alg mv_cesa_ecb_des3_ede_alg = {
.decrypt = mv_cesa_ecb_des3_ede_decrypt,
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.ivsize = DES3_EDE_BLOCK_SIZE,
.base = {
.cra_name = "ecb(des3_ede)",
.cra_driver_name = "mv-ecb-des3-ede",

View File

@ -896,7 +896,6 @@ static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher,
struct crypto_authenc_key_param *param;
int enckeylen = 0, authkeylen = 0;
struct rtattr *rta = (void *)key;
int status;
if (!RTA_OK(rta, keylen))
return -EINVAL;
@ -938,11 +937,7 @@ static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher,
ctx->enc_key_len = enckeylen;
ctx->auth_key_len = authkeylen;
status = aead_hmac_init(cipher);
if (status)
return status;
return 0;
return aead_hmac_init(cipher);
}
static int otx2_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher,

View File

@ -14,6 +14,7 @@
static const struct pci_device_id adf_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, ADF_4XXX_PCI_DEVICE_ID), },
{ PCI_VDEVICE(INTEL, ADF_401XX_PCI_DEVICE_ID), },
{ }
};
MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
@ -75,13 +76,6 @@ static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
if (ret)
goto err;
/* Temporarily set the number of crypto instances to zero to avoid
* registering the crypto algorithms.
* This will be removed when the algorithms will support the
* CRYPTO_TFM_REQ_MAY_BACKLOG flag
*/
instances = 0;
for (i = 0; i < instances; i++) {
val = i;
bank = i * 2;

View File

@ -78,19 +78,6 @@ static const u32 *adf_get_arbiter_mapping(void)
return thrd_to_arb_map;
}
static void adf_enable_ints(struct adf_accel_dev *accel_dev)
{
void __iomem *addr;
addr = (&GET_BARS(accel_dev)[ADF_C3XXX_PMISC_BAR])->virt_addr;
/* Enable bundle and misc interrupts */
ADF_CSR_WR(addr, ADF_C3XXX_SMIAPF0_MASK_OFFSET,
ADF_C3XXX_SMIA0_MASK);
ADF_CSR_WR(addr, ADF_C3XXX_SMIAPF1_MASK_OFFSET,
ADF_C3XXX_SMIA1_MASK);
}
static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable)
{
adf_gen2_cfg_iov_thds(accel_dev, enable,
@ -133,7 +120,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
hw_data->enable_ints = adf_gen2_enable_ints;
hw_data->reset_device = adf_reset_flr;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
hw_data->disable_iov = adf_disable_sriov;

View File

@ -13,10 +13,6 @@
#define ADF_C3XXX_ACCELERATORS_MASK 0x7
#define ADF_C3XXX_ACCELENGINES_MASK 0x3F
#define ADF_C3XXX_ETR_MAX_BANKS 16
#define ADF_C3XXX_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)
#define ADF_C3XXX_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30)
#define ADF_C3XXX_SMIA0_MASK 0xFFFF
#define ADF_C3XXX_SMIA1_MASK 0x1
#define ADF_C3XXX_SOFTSTRAP_CSR_OFFSET 0x2EC
/* AE to function mapping */

View File

@ -80,19 +80,6 @@ static const u32 *adf_get_arbiter_mapping(void)
return thrd_to_arb_map;
}
static void adf_enable_ints(struct adf_accel_dev *accel_dev)
{
void __iomem *addr;
addr = (&GET_BARS(accel_dev)[ADF_C62X_PMISC_BAR])->virt_addr;
/* Enable bundle and misc interrupts */
ADF_CSR_WR(addr, ADF_C62X_SMIAPF0_MASK_OFFSET,
ADF_C62X_SMIA0_MASK);
ADF_CSR_WR(addr, ADF_C62X_SMIAPF1_MASK_OFFSET,
ADF_C62X_SMIA1_MASK);
}
static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable)
{
adf_gen2_cfg_iov_thds(accel_dev, enable,
@ -135,7 +122,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
hw_data->enable_ints = adf_enable_ints;
hw_data->enable_ints = adf_gen2_enable_ints;
hw_data->reset_device = adf_reset_flr;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
hw_data->disable_iov = adf_disable_sriov;

View File

@ -13,10 +13,6 @@
#define ADF_C62X_ACCELERATORS_MASK 0x1F
#define ADF_C62X_ACCELENGINES_MASK 0x3FF
#define ADF_C62X_ETR_MAX_BANKS 16
#define ADF_C62X_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)
#define ADF_C62X_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30)
#define ADF_C62X_SMIA0_MASK 0xFFFF
#define ADF_C62X_SMIA1_MASK 0x1
#define ADF_C62X_SOFTSTRAP_CSR_OFFSET 0x2EC
/* AE to function mapping */

View File

@ -16,6 +16,7 @@ intel_qat-objs := adf_cfg.o \
qat_crypto.o \
qat_algs.o \
qat_asym_algs.o \
qat_algs_send.o \
qat_uclo.o \
qat_hal.o

View File

@ -19,6 +19,8 @@
#define ADF_4XXX_DEVICE_NAME "4xxx"
#define ADF_4XXX_PCI_DEVICE_ID 0x4940
#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941
#define ADF_401XX_PCI_DEVICE_ID 0x4942
#define ADF_401XXIOV_PCI_DEVICE_ID 0x4943
#define ADF_DEVICE_FUSECTL_OFFSET 0x40
#define ADF_DEVICE_LEGFUSE_OFFSET 0x4C
#define ADF_DEVICE_FUSECTL_MASK 0x80000000
@ -152,9 +154,9 @@ struct adf_pfvf_ops {
int (*enable_comms)(struct adf_accel_dev *accel_dev);
u32 (*get_pf2vf_offset)(u32 i);
u32 (*get_vf2pf_offset)(u32 i);
u32 (*get_vf2pf_sources)(void __iomem *pmisc_addr);
void (*enable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask);
void (*disable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask);
void (*disable_all_vf2pf_interrupts)(void __iomem *pmisc_addr);
u32 (*disable_pending_vf2pf_interrupts)(void __iomem *pmisc_addr);
int (*send_msg)(struct adf_accel_dev *accel_dev, struct pfvf_message msg,
u32 pfvf_offset, struct mutex *csr_lock);
struct pfvf_message (*recv_msg)(struct adf_accel_dev *accel_dev,

View File

@ -195,10 +195,8 @@ bool adf_misc_wq_queue_work(struct work_struct *work);
#if defined(CONFIG_PCI_IOV)
int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask);
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
u32 vf_mask);
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask);
void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev);
bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev);
bool adf_recv_and_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u32 vf_nr);
int adf_pf2vf_handle_pf_restarting(struct adf_accel_dev *accel_dev);
@ -217,14 +215,6 @@ static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev)
{
}
static inline void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
{
}
static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
{
}
static inline int adf_init_pf_wq(void)
{
return 0;
@ -243,10 +233,6 @@ static inline void adf_exit_vf_wq(void)
{
}
static inline void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
{
}
#endif
static inline void __iomem *adf_get_pmisc_base(struct adf_accel_dev *accel_dev)

View File

@ -98,6 +98,19 @@ void adf_gen2_get_arb_info(struct arb_info *arb_info)
}
EXPORT_SYMBOL_GPL(adf_gen2_get_arb_info);
void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev)
{
void __iomem *addr = adf_get_pmisc_base(accel_dev);
u32 val;
val = accel_dev->pf.vf_info ? 0 : BIT_ULL(GET_MAX_BANKS(accel_dev)) - 1;
/* Enable bundle and misc interrupts */
ADF_CSR_WR(addr, ADF_GEN2_SMIAPF0_MASK_OFFSET, val);
ADF_CSR_WR(addr, ADF_GEN2_SMIAPF1_MASK_OFFSET, ADF_GEN2_SMIA1_MASK);
}
EXPORT_SYMBOL_GPL(adf_gen2_enable_ints);
static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size)
{
return BUILD_RING_BASE_ADDR(addr, size);

View File

@ -145,6 +145,11 @@ do { \
#define ADF_GEN2_CERRSSMSH(i) ((i) * 0x4000 + 0x10)
#define ADF_GEN2_ERRSSMSH_EN BIT(3)
/* Interrupts */
#define ADF_GEN2_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)
#define ADF_GEN2_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30)
#define ADF_GEN2_SMIA1_MASK 0x1
u32 adf_gen2_get_num_accels(struct adf_hw_device_data *self);
u32 adf_gen2_get_num_aes(struct adf_hw_device_data *self);
void adf_gen2_enable_error_correction(struct adf_accel_dev *accel_dev);
@ -153,6 +158,7 @@ void adf_gen2_cfg_iov_thds(struct adf_accel_dev *accel_dev, bool enable,
void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
void adf_gen2_get_admin_info(struct admin_info *admin_csrs_info);
void adf_gen2_get_arb_info(struct arb_info *arb_info);
void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev);
u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev);
void adf_gen2_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);

View File

@ -13,8 +13,9 @@
#include "adf_pfvf_utils.h"
/* VF2PF interrupts */
#define ADF_GEN2_VF_MSK 0xFFFF
#define ADF_GEN2_ERR_REG_VF2PF(vf_src) (((vf_src) & 0x01FFFE00) >> 9)
#define ADF_GEN2_ERR_MSK_VF2PF(vf_mask) (((vf_mask) & 0xFFFF) << 9)
#define ADF_GEN2_ERR_MSK_VF2PF(vf_mask) (((vf_mask) & ADF_GEN2_VF_MSK) << 9)
#define ADF_GEN2_PF_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
#define ADF_GEN2_VF_PF2VF_OFFSET 0x200
@ -50,43 +51,60 @@ static u32 adf_gen2_vf_get_pfvf_offset(u32 i)
return ADF_GEN2_VF_PF2VF_OFFSET;
}
static u32 adf_gen2_get_vf2pf_sources(void __iomem *pmisc_addr)
{
u32 errsou3, errmsk3, vf_int_mask;
/* Get the interrupt sources triggered by VFs */
errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3);
vf_int_mask = ADF_GEN2_ERR_REG_VF2PF(errsou3);
/* To avoid adding duplicate entries to work queue, clear
* vf_int_mask_sets bits that are already masked in ERRMSK register.
*/
errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3);
vf_int_mask &= ~ADF_GEN2_ERR_REG_VF2PF(errmsk3);
return vf_int_mask;
}
static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr,
u32 vf_mask)
static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask)
{
/* Enable VF2PF Messaging Ints - VFs 0 through 15 per vf_mask[15:0] */
if (vf_mask & 0xFFFF) {
if (vf_mask & ADF_GEN2_VF_MSK) {
u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3)
& ~ADF_GEN2_ERR_MSK_VF2PF(vf_mask);
ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val);
}
}
static void adf_gen2_disable_vf2pf_interrupts(void __iomem *pmisc_addr,
u32 vf_mask)
static void adf_gen2_disable_all_vf2pf_interrupts(void __iomem *pmisc_addr)
{
/* Disable VF2PF interrupts for VFs 0 through 15 per vf_mask[15:0] */
if (vf_mask & 0xFFFF) {
u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3)
| ADF_GEN2_ERR_MSK_VF2PF(vf_mask);
ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val);
}
u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3)
| ADF_GEN2_ERR_MSK_VF2PF(ADF_GEN2_VF_MSK);
ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val);
}
static u32 adf_gen2_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr)
{
u32 sources, disabled, pending;
u32 errsou3, errmsk3;
/* Get the interrupt sources triggered by VFs */
errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3);
sources = ADF_GEN2_ERR_REG_VF2PF(errsou3);
if (!sources)
return 0;
/* Get the already disabled interrupts */
errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3);
disabled = ADF_GEN2_ERR_REG_VF2PF(errmsk3);
pending = sources & ~disabled;
if (!pending)
return 0;
/* Due to HW limitations, when disabling the interrupts, we can't
* just disable the requested sources, as this would lead to missed
* interrupts if ERRSOU3 changes just before writing to ERRMSK3.
* To work around it, disable all and re-enable only the sources that
* are not in vf_mask and were not already disabled. Re-enabling will
* trigger a new interrupt for the sources that have changed in the
* meantime, if any.
*/
errmsk3 |= ADF_GEN2_ERR_MSK_VF2PF(ADF_GEN2_VF_MSK);
ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3);
errmsk3 &= ADF_GEN2_ERR_MSK_VF2PF(sources | disabled);
ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3);
/* Return the sources of the (new) interrupt(s) */
return pending;
}
static u32 gen2_csr_get_int_bit(enum gen2_csr_pos offset)
@ -362,9 +380,9 @@ void adf_gen2_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops)
pfvf_ops->enable_comms = adf_enable_pf2vf_comms;
pfvf_ops->get_pf2vf_offset = adf_gen2_pf_get_pfvf_offset;
pfvf_ops->get_vf2pf_offset = adf_gen2_pf_get_pfvf_offset;
pfvf_ops->get_vf2pf_sources = adf_gen2_get_vf2pf_sources;
pfvf_ops->enable_vf2pf_interrupts = adf_gen2_enable_vf2pf_interrupts;
pfvf_ops->disable_vf2pf_interrupts = adf_gen2_disable_vf2pf_interrupts;
pfvf_ops->disable_all_vf2pf_interrupts = adf_gen2_disable_all_vf2pf_interrupts;
pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen2_disable_pending_vf2pf_interrupts;
pfvf_ops->send_msg = adf_gen2_pf2vf_send;
pfvf_ops->recv_msg = adf_gen2_vf2pf_recv;
}

View File

@ -15,6 +15,7 @@
/* VF2PF interrupt source registers */
#define ADF_4XXX_VM2PF_SOU 0x41A180
#define ADF_4XXX_VM2PF_MSK 0x41A1C0
#define ADF_GEN4_VF_MSK 0xFFFF
#define ADF_PFVF_GEN4_MSGTYPE_SHIFT 2
#define ADF_PFVF_GEN4_MSGTYPE_MASK 0x3F
@ -36,32 +37,48 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i)
return ADF_4XXX_VM2PF_OFFSET(i);
}
static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr)
static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask)
{
u32 sou, mask;
sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU);
mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK);
return sou & ~mask;
}
static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr,
u32 vf_mask)
{
unsigned int val;
u32 val;
val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask;
ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
}
static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr,
u32 vf_mask)
static void adf_gen4_disable_all_vf2pf_interrupts(void __iomem *pmisc_addr)
{
unsigned int val;
ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, ADF_GEN4_VF_MSK);
}
val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) | vf_mask;
ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
static u32 adf_gen4_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr)
{
u32 sources, disabled, pending;
/* Get the interrupt sources triggered by VFs */
sources = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU);
if (!sources)
return 0;
/* Get the already disabled interrupts */
disabled = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK);
pending = sources & ~disabled;
if (!pending)
return 0;
/* Due to HW limitations, when disabling the interrupts, we can't
* just disable the requested sources, as this would lead to missed
* interrupts if VM2PF_SOU changes just before writing to VM2PF_MSK.
* To work around it, disable all and re-enable only the sources that
* are not in vf_mask and were not already disabled. Re-enabling will
* trigger a new interrupt for the sources that have changed in the
* meantime, if any.
*/
ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, ADF_GEN4_VF_MSK);
ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, disabled | sources);
/* Return the sources of the (new) interrupt(s) */
return pending;
}
static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev,
@ -96,10 +113,16 @@ static struct pfvf_message adf_gen4_pfvf_recv(struct adf_accel_dev *accel_dev,
u32 pfvf_offset, u8 compat_ver)
{
void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev);
struct pfvf_message msg = { 0 };
u32 csr_val;
/* Read message from the CSR */
csr_val = ADF_CSR_RD(pmisc_addr, pfvf_offset);
if (!(csr_val & ADF_PFVF_INT)) {
dev_info(&GET_DEV(accel_dev),
"Spurious PFVF interrupt, msg 0x%.8x. Ignored\n", csr_val);
return msg;
}
/* We can now acknowledge the message reception by clearing the
* interrupt bit
@ -115,9 +138,9 @@ void adf_gen4_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops)
pfvf_ops->enable_comms = adf_enable_pf2vf_comms;
pfvf_ops->get_pf2vf_offset = adf_gen4_pf_get_pf2vf_offset;
pfvf_ops->get_vf2pf_offset = adf_gen4_pf_get_vf2pf_offset;
pfvf_ops->get_vf2pf_sources = adf_gen4_get_vf2pf_sources;
pfvf_ops->enable_vf2pf_interrupts = adf_gen4_enable_vf2pf_interrupts;
pfvf_ops->disable_vf2pf_interrupts = adf_gen4_disable_vf2pf_interrupts;
pfvf_ops->disable_all_vf2pf_interrupts = adf_gen4_disable_all_vf2pf_interrupts;
pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen4_disable_pending_vf2pf_interrupts;
pfvf_ops->send_msg = adf_gen4_pfvf_send;
pfvf_ops->recv_msg = adf_gen4_pfvf_recv;
}

View File

@ -66,42 +66,39 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
}
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev)
{
void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev);
unsigned long flags;
spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
GET_PFVF_OPS(accel_dev)->disable_vf2pf_interrupts(pmisc_addr, vf_mask);
GET_PFVF_OPS(accel_dev)->disable_all_vf2pf_interrupts(pmisc_addr);
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
}
static void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev,
u32 vf_mask)
static u32 adf_disable_pending_vf2pf_interrupts(struct adf_accel_dev *accel_dev)
{
void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev);
u32 pending;
spin_lock(&accel_dev->pf.vf2pf_ints_lock);
GET_PFVF_OPS(accel_dev)->disable_vf2pf_interrupts(pmisc_addr, vf_mask);
pending = GET_PFVF_OPS(accel_dev)->disable_pending_vf2pf_interrupts(pmisc_addr);
spin_unlock(&accel_dev->pf.vf2pf_ints_lock);
return pending;
}
static bool adf_handle_vf2pf_int(struct adf_accel_dev *accel_dev)
{
void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev);
bool irq_handled = false;
unsigned long vf_mask;
/* Get the interrupt sources triggered by VFs */
vf_mask = GET_PFVF_OPS(accel_dev)->get_vf2pf_sources(pmisc_addr);
/* Get the interrupt sources triggered by VFs, except for those already disabled */
vf_mask = adf_disable_pending_vf2pf_interrupts(accel_dev);
if (vf_mask) {
struct adf_accel_vf_info *vf_info;
int i;
/* Disable VF2PF interrupts for VFs with pending ints */
adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask);
/*
* Handle VF2PF interrupt unless the VF is malicious and
* is attempting to flood the host OS with VF2PF interrupts.

View File

@ -8,8 +8,8 @@
/*
* PF<->VF Gen2 Messaging format
*
* The PF has an array of 32-bit PF2VF registers, one for each VF. The
* PF can access all these registers; each VF can access only the one
* The PF has an array of 32-bit PF2VF registers, one for each VF. The
* PF can access all these registers while each VF can access only the one
* register associated with that particular VF.
*
* The register functionally is split into two parts:

View File

@ -154,7 +154,7 @@ static struct pfvf_message handle_blkmsg_req(struct adf_accel_vf_info *vf_info,
if (FIELD_GET(ADF_VF2PF_BLOCK_CRC_REQ_MASK, req.data)) {
dev_dbg(&GET_DEV(vf_info->accel_dev),
"BlockMsg of type %d for CRC over %d bytes received from VF%d\n",
blk_type, blk_byte, vf_info->vf_nr);
blk_type, blk_byte + 1, vf_info->vf_nr);
if (!adf_pf2vf_blkmsg_get_data(vf_info, blk_type, blk_byte,
byte_max, &resp_data,
@ -242,7 +242,9 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr,
"VersionRequest received from VF%d (vers %d) to PF (vers %d)\n",
vf_nr, vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION)
if (vf_compat_ver == 0)
compat = ADF_PF2VF_VF_INCOMPATIBLE;
else if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION)
compat = ADF_PF2VF_VF_COMPATIBLE;
else
compat = ADF_PF2VF_VF_COMPAT_UNKNOWN;

View File

@ -3,7 +3,6 @@
#include <linux/workqueue.h>
#include <linux/pci.h>
#include <linux/device.h>
#include <linux/iommu.h>
#include "adf_common_drv.h"
#include "adf_cfg.h"
#include "adf_pfvf_pf_msg.h"
@ -74,8 +73,7 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
hw_data->configure_iov_threads(accel_dev, true);
/* Enable VF to PF interrupts for all VFs */
if (hw_data->pfvf_ops.get_pf2vf_offset)
adf_enable_vf2pf_interrupts(accel_dev, BIT_ULL(totalvfs) - 1);
adf_enable_vf2pf_interrupts(accel_dev, BIT_ULL(totalvfs) - 1);
/*
* Due to the hardware design, when SR-IOV and the ring arbiter
@ -104,22 +102,18 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
if (!accel_dev->pf.vf_info)
return;
if (hw_data->pfvf_ops.get_pf2vf_offset)
adf_pf2vf_notify_restarting(accel_dev);
adf_pf2vf_notify_restarting(accel_dev);
pci_disable_sriov(accel_to_pci_dev(accel_dev));
/* Disable VF to PF interrupts */
if (hw_data->pfvf_ops.get_pf2vf_offset)
adf_disable_vf2pf_interrupts(accel_dev, GENMASK(31, 0));
adf_disable_all_vf2pf_interrupts(accel_dev);
/* Clear Valid bits in AE Thread to PCIe Function Mapping */
if (hw_data->configure_iov_threads)
hw_data->configure_iov_threads(accel_dev, false);
for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) {
for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++)
mutex_destroy(&vf->pf2vf_lock);
}
kfree(accel_dev->pf.vf_info);
accel_dev->pf.vf_info = NULL;
@ -176,7 +170,7 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
return -EFAULT;
}
if (!iommu_present(&pci_bus_type))
if (!device_iommu_mapped(&pdev->dev))
dev_warn(&pdev->dev, "IOMMU should be enabled for SR-IOV to work correctly\n");
if (accel_dev->pf.vf_info) {

View File

@ -8,6 +8,9 @@
#include "adf_cfg.h"
#include "adf_common_drv.h"
#define ADF_MAX_RING_THRESHOLD 80
#define ADF_PERCENT(tot, percent) (((tot) * (percent)) / 100)
static inline u32 adf_modulo(u32 data, u32 shift)
{
u32 div = data >> shift;
@ -77,6 +80,11 @@ static void adf_disable_ring_irq(struct adf_etr_bank_data *bank, u32 ring)
bank->irq_mask);
}
bool adf_ring_nearly_full(struct adf_etr_ring_data *ring)
{
return atomic_read(ring->inflights) > ring->threshold;
}
int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg)
{
struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(ring->bank->accel_dev);
@ -217,6 +225,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
struct adf_etr_bank_data *bank;
struct adf_etr_ring_data *ring;
char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
int max_inflights;
u32 ring_num;
int ret;
@ -263,6 +272,8 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
ring->ring_size = adf_verify_ring_size(msg_size, num_msgs);
ring->head = 0;
ring->tail = 0;
max_inflights = ADF_MAX_INFLIGHTS(ring->ring_size, ring->msg_size);
ring->threshold = ADF_PERCENT(max_inflights, ADF_MAX_RING_THRESHOLD);
atomic_set(ring->inflights, 0);
ret = adf_init_ring(ring);
if (ret)

View File

@ -14,6 +14,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
const char *ring_name, adf_callback_fn callback,
int poll_mode, struct adf_etr_ring_data **ring_ptr);
bool adf_ring_nearly_full(struct adf_etr_ring_data *ring);
int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg);
void adf_remove_ring(struct adf_etr_ring_data *ring);
#endif

View File

@ -22,6 +22,7 @@ struct adf_etr_ring_data {
spinlock_t lock; /* protects ring data struct */
u16 head;
u16 tail;
u32 threshold;
u8 ring_number;
u8 ring_size;
u8 msg_size;

View File

@ -70,6 +70,7 @@ static void adf_dev_stop_async(struct work_struct *work)
container_of(work, struct adf_vf_stop_data, work);
struct adf_accel_dev *accel_dev = stop_data->accel_dev;
adf_dev_restarting_notify(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);

View File

@ -17,7 +17,7 @@
#include <crypto/xts.h>
#include <linux/dma-mapping.h>
#include "adf_accel_devices.h"
#include "adf_transport.h"
#include "qat_algs_send.h"
#include "adf_common_drv.h"
#include "qat_crypto.h"
#include "icp_qat_hw.h"
@ -46,19 +46,6 @@
static DEFINE_MUTEX(algs_lock);
static unsigned int active_devs;
struct qat_alg_buf {
u32 len;
u32 resrvd;
u64 addr;
} __packed;
struct qat_alg_buf_list {
u64 resrvd;
u32 num_bufs;
u32 num_mapped_bufs;
struct qat_alg_buf bufers[];
} __packed __aligned(64);
/* Common content descriptor */
struct qat_alg_cd {
union {
@ -693,7 +680,10 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst,
bl->bufers[i].len, DMA_BIDIRECTIONAL);
dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE);
kfree(bl);
if (!qat_req->buf.sgl_src_valid)
kfree(bl);
if (blp != blpout) {
/* If out of place operation dma unmap only data */
int bufless = blout->num_bufs - blout->num_mapped_bufs;
@ -704,14 +694,17 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst,
DMA_BIDIRECTIONAL);
}
dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE);
kfree(blout);
if (!qat_req->buf.sgl_dst_valid)
kfree(blout);
}
}
static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
struct scatterlist *sgl,
struct scatterlist *sglout,
struct qat_crypto_request *qat_req)
struct qat_crypto_request *qat_req,
gfp_t flags)
{
struct device *dev = &GET_DEV(inst->accel_dev);
int i, sg_nctr = 0;
@ -721,15 +714,24 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
dma_addr_t blp = DMA_MAPPING_ERROR;
dma_addr_t bloutp = DMA_MAPPING_ERROR;
struct scatterlist *sg;
size_t sz_out, sz = struct_size(bufl, bufers, n + 1);
size_t sz_out, sz = struct_size(bufl, bufers, n);
int node = dev_to_node(&GET_DEV(inst->accel_dev));
if (unlikely(!n))
return -EINVAL;
bufl = kzalloc_node(sz, GFP_ATOMIC,
dev_to_node(&GET_DEV(inst->accel_dev)));
if (unlikely(!bufl))
return -ENOMEM;
qat_req->buf.sgl_src_valid = false;
qat_req->buf.sgl_dst_valid = false;
if (n > QAT_MAX_BUFF_DESC) {
bufl = kzalloc_node(sz, flags, node);
if (unlikely(!bufl))
return -ENOMEM;
} else {
bufl = &qat_req->buf.sgl_src.sgl_hdr;
memset(bufl, 0, sizeof(struct qat_alg_buf_list));
qat_req->buf.sgl_src_valid = true;
}
for_each_sg(sgl, sg, n, i)
bufl->bufers[i].addr = DMA_MAPPING_ERROR;
@ -760,12 +762,18 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
struct qat_alg_buf *bufers;
n = sg_nents(sglout);
sz_out = struct_size(buflout, bufers, n + 1);
sz_out = struct_size(buflout, bufers, n);
sg_nctr = 0;
buflout = kzalloc_node(sz_out, GFP_ATOMIC,
dev_to_node(&GET_DEV(inst->accel_dev)));
if (unlikely(!buflout))
goto err_in;
if (n > QAT_MAX_BUFF_DESC) {
buflout = kzalloc_node(sz_out, flags, node);
if (unlikely(!buflout))
goto err_in;
} else {
buflout = &qat_req->buf.sgl_dst.sgl_hdr;
memset(buflout, 0, sizeof(struct qat_alg_buf_list));
qat_req->buf.sgl_dst_valid = true;
}
bufers = buflout->bufers;
for_each_sg(sglout, sg, n, i)
@ -810,7 +818,9 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
dma_unmap_single(dev, buflout->bufers[i].addr,
buflout->bufers[i].len,
DMA_BIDIRECTIONAL);
kfree(buflout);
if (!qat_req->buf.sgl_dst_valid)
kfree(buflout);
err_in:
if (!dma_mapping_error(dev, blp))
@ -823,7 +833,8 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
bufl->bufers[i].len,
DMA_BIDIRECTIONAL);
kfree(bufl);
if (!qat_req->buf.sgl_src_valid)
kfree(bufl);
dev_err(dev, "Failed to map buf for dma\n");
return -ENOMEM;
@ -925,8 +936,25 @@ void qat_alg_callback(void *resp)
struct icp_qat_fw_la_resp *qat_resp = resp;
struct qat_crypto_request *qat_req =
(void *)(__force long)qat_resp->opaque_data;
struct qat_instance_backlog *backlog = qat_req->alg_req.backlog;
qat_req->cb(qat_resp, qat_req);
qat_alg_send_backlog(backlog);
}
static int qat_alg_send_sym_message(struct qat_crypto_request *qat_req,
struct qat_crypto_instance *inst,
struct crypto_async_request *base)
{
struct qat_alg_req *alg_req = &qat_req->alg_req;
alg_req->fw_req = (u32 *)&qat_req->req;
alg_req->tx_ring = inst->sym_tx;
alg_req->base = base;
alg_req->backlog = &inst->backlog;
return qat_alg_send_message(alg_req);
}
static int qat_alg_aead_dec(struct aead_request *areq)
@ -939,14 +967,15 @@ static int qat_alg_aead_dec(struct aead_request *areq)
struct icp_qat_fw_la_auth_req_params *auth_param;
struct icp_qat_fw_la_bulk_req *msg;
int digst_size = crypto_aead_authsize(aead_tfm);
int ret, ctr = 0;
gfp_t f = qat_algs_alloc_flags(&areq->base);
int ret;
u32 cipher_len;
cipher_len = areq->cryptlen - digst_size;
if (cipher_len % AES_BLOCK_SIZE != 0)
return -EINVAL;
ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req);
ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
if (unlikely(ret))
return ret;
@ -965,15 +994,12 @@ static int qat_alg_aead_dec(struct aead_request *areq)
auth_param = (void *)((u8 *)cipher_param + sizeof(*cipher_param));
auth_param->auth_off = 0;
auth_param->auth_len = areq->assoclen + cipher_param->cipher_length;
do {
ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
} while (ret == -EAGAIN && ctr++ < 10);
if (ret == -EAGAIN) {
ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
if (ret == -ENOSPC)
qat_alg_free_bufl(ctx->inst, qat_req);
return -EBUSY;
}
return -EINPROGRESS;
return ret;
}
static int qat_alg_aead_enc(struct aead_request *areq)
@ -984,14 +1010,15 @@ static int qat_alg_aead_enc(struct aead_request *areq)
struct qat_crypto_request *qat_req = aead_request_ctx(areq);
struct icp_qat_fw_la_cipher_req_params *cipher_param;
struct icp_qat_fw_la_auth_req_params *auth_param;
gfp_t f = qat_algs_alloc_flags(&areq->base);
struct icp_qat_fw_la_bulk_req *msg;
u8 *iv = areq->iv;
int ret, ctr = 0;
int ret;
if (areq->cryptlen % AES_BLOCK_SIZE != 0)
return -EINVAL;
ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req);
ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
if (unlikely(ret))
return ret;
@ -1013,15 +1040,11 @@ static int qat_alg_aead_enc(struct aead_request *areq)
auth_param->auth_off = 0;
auth_param->auth_len = areq->assoclen + areq->cryptlen;
do {
ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
} while (ret == -EAGAIN && ctr++ < 10);
if (ret == -EAGAIN) {
ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
if (ret == -ENOSPC)
qat_alg_free_bufl(ctx->inst, qat_req);
return -EBUSY;
}
return -EINPROGRESS;
return ret;
}
static int qat_alg_skcipher_rekey(struct qat_alg_skcipher_ctx *ctx,
@ -1173,13 +1196,14 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm);
struct qat_crypto_request *qat_req = skcipher_request_ctx(req);
struct icp_qat_fw_la_cipher_req_params *cipher_param;
gfp_t f = qat_algs_alloc_flags(&req->base);
struct icp_qat_fw_la_bulk_req *msg;
int ret, ctr = 0;
int ret;
if (req->cryptlen == 0)
return 0;
ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req);
ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
if (unlikely(ret))
return ret;
@ -1198,15 +1222,11 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
qat_alg_set_req_iv(qat_req);
do {
ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
} while (ret == -EAGAIN && ctr++ < 10);
if (ret == -EAGAIN) {
ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
if (ret == -ENOSPC)
qat_alg_free_bufl(ctx->inst, qat_req);
return -EBUSY;
}
return -EINPROGRESS;
return ret;
}
static int qat_alg_skcipher_blk_encrypt(struct skcipher_request *req)
@ -1242,13 +1262,14 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm);
struct qat_crypto_request *qat_req = skcipher_request_ctx(req);
struct icp_qat_fw_la_cipher_req_params *cipher_param;
gfp_t f = qat_algs_alloc_flags(&req->base);
struct icp_qat_fw_la_bulk_req *msg;
int ret, ctr = 0;
int ret;
if (req->cryptlen == 0)
return 0;
ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req);
ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
if (unlikely(ret))
return ret;
@ -1268,15 +1289,11 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
qat_alg_set_req_iv(qat_req);
qat_alg_update_iv(qat_req);
do {
ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
} while (ret == -EAGAIN && ctr++ < 10);
if (ret == -EAGAIN) {
ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
if (ret == -ENOSPC)
qat_alg_free_bufl(ctx->inst, qat_req);
return -EBUSY;
}
return -EINPROGRESS;
return ret;
}
static int qat_alg_skcipher_blk_decrypt(struct skcipher_request *req)

View File

@ -0,0 +1,86 @@
// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
/* Copyright(c) 2022 Intel Corporation */
#include "adf_transport.h"
#include "qat_algs_send.h"
#include "qat_crypto.h"
#define ADF_MAX_RETRIES 20
static int qat_alg_send_message_retry(struct qat_alg_req *req)
{
int ret = 0, ctr = 0;
do {
ret = adf_send_message(req->tx_ring, req->fw_req);
} while (ret == -EAGAIN && ctr++ < ADF_MAX_RETRIES);
if (ret == -EAGAIN)
return -ENOSPC;
return -EINPROGRESS;
}
void qat_alg_send_backlog(struct qat_instance_backlog *backlog)
{
struct qat_alg_req *req, *tmp;
spin_lock_bh(&backlog->lock);
list_for_each_entry_safe(req, tmp, &backlog->list, list) {
if (adf_send_message(req->tx_ring, req->fw_req)) {
/* The HW ring is full. Do nothing.
* qat_alg_send_backlog() will be invoked again by
* another callback.
*/
break;
}
list_del(&req->list);
req->base->complete(req->base, -EINPROGRESS);
}
spin_unlock_bh(&backlog->lock);
}
static void qat_alg_backlog_req(struct qat_alg_req *req,
struct qat_instance_backlog *backlog)
{
INIT_LIST_HEAD(&req->list);
spin_lock_bh(&backlog->lock);
list_add_tail(&req->list, &backlog->list);
spin_unlock_bh(&backlog->lock);
}
static int qat_alg_send_message_maybacklog(struct qat_alg_req *req)
{
struct qat_instance_backlog *backlog = req->backlog;
struct adf_etr_ring_data *tx_ring = req->tx_ring;
u32 *fw_req = req->fw_req;
/* If any request is already backlogged, then add to backlog list */
if (!list_empty(&backlog->list))
goto enqueue;
/* If ring is nearly full, then add to backlog list */
if (adf_ring_nearly_full(tx_ring))
goto enqueue;
/* If adding request to HW ring fails, then add to backlog list */
if (adf_send_message(tx_ring, fw_req))
goto enqueue;
return -EINPROGRESS;
enqueue:
qat_alg_backlog_req(req, backlog);
return -EBUSY;
}
int qat_alg_send_message(struct qat_alg_req *req)
{
u32 flags = req->base->flags;
if (flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
return qat_alg_send_message_maybacklog(req);
else
return qat_alg_send_message_retry(req);
}

View File

@ -0,0 +1,11 @@
/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
/* Copyright(c) 2022 Intel Corporation */
#ifndef QAT_ALGS_SEND_H
#define QAT_ALGS_SEND_H
#include "qat_crypto.h"
int qat_alg_send_message(struct qat_alg_req *req);
void qat_alg_send_backlog(struct qat_instance_backlog *backlog);
#endif

Some files were not shown because too many files have changed in this diff Show More