mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-29 17:22:07 +00:00
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "Algorithms: - Add AES-NI/AVX/x86_64 implementation of SM4. Drivers: - Add Arm SMCCC TRNG based driver" [ And obviously a lot of random fixes and updates - Linus] * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (84 commits) crypto: sha512 - remove imaginary and mystifying clearing of variables crypto: aesni - xts_crypt() return if walk.nbytes is 0 padata: Remove repeated verbose license text crypto: ccp - Add support for new CCP/PSP device ID crypto: x86/sm4 - add AES-NI/AVX2/x86_64 implementation crypto: x86/sm4 - export reusable AESNI/AVX functions crypto: rmd320 - remove rmd320 in Makefile crypto: skcipher - in_irq() cleanup crypto: hisilicon - check _PS0 and _PR0 method crypto: hisilicon - change parameter passing of debugfs function crypto: hisilicon - support runtime PM for accelerator device crypto: hisilicon - add runtime PM ops crypto: hisilicon - using 'debugfs_create_file' instead of 'debugfs_create_regset32' crypto: tcrypt - add GCM/CCM mode test for SM4 algorithm crypto: testmgr - Add GCM/CCM mode test of SM4 algorithm crypto: tcrypt - Fix missing return value check crypto: hisilicon/sec - modify the hardware endian configuration crypto: hisilicon/sec - fix the abnormal exiting process crypto: qat - store vf.compatible flag crypto: qat - do not export adf_iov_putmsg() ...
This commit is contained in:
commit
44a7d44411
@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = {
|
||||
.max_size = curve25519_max_size,
|
||||
};
|
||||
|
||||
static int __init mod_init(void)
|
||||
static int __init arm_curve25519_init(void)
|
||||
{
|
||||
if (elf_hwcap & HWCAP_NEON) {
|
||||
static_branch_enable(&have_neon);
|
||||
@ -122,14 +122,14 @@ static int __init mod_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
static void __exit arm_curve25519_exit(void)
|
||||
{
|
||||
if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
|
||||
crypto_unregister_kpp(&curve25519_alg);
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
module_init(arm_curve25519_init);
|
||||
module_exit(arm_curve25519_exit);
|
||||
|
||||
MODULE_ALIAS_CRYPTO("curve25519");
|
||||
MODULE_ALIAS_CRYPTO("curve25519-neon");
|
||||
|
@ -51,7 +51,7 @@ config CRYPTO_SM4_ARM64_CE
|
||||
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_SM4
|
||||
select CRYPTO_LIB_SM4
|
||||
|
||||
config CRYPTO_GHASH_ARM64_CE
|
||||
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
|
||||
|
@ -17,12 +17,20 @@ MODULE_LICENSE("GPL v2");
|
||||
|
||||
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
|
||||
|
||||
static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
return sm4_expandkey(ctx, key, key_len);
|
||||
}
|
||||
|
||||
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!crypto_simd_usable()) {
|
||||
crypto_sm4_encrypt(tfm, out, in);
|
||||
sm4_crypt_block(ctx->rkey_enc, out, in);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
|
||||
@ -32,10 +40,10 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
|
||||
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!crypto_simd_usable()) {
|
||||
crypto_sm4_decrypt(tfm, out, in);
|
||||
sm4_crypt_block(ctx->rkey_dec, out, in);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
|
||||
@ -49,12 +57,12 @@ static struct crypto_alg sm4_ce_alg = {
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u.cipher = {
|
||||
.cia_min_keysize = SM4_KEY_SIZE,
|
||||
.cia_max_keysize = SM4_KEY_SIZE,
|
||||
.cia_setkey = crypto_sm4_set_key,
|
||||
.cia_setkey = sm4_ce_setkey,
|
||||
.cia_encrypt = sm4_ce_encrypt,
|
||||
.cia_decrypt = sm4_ce_decrypt
|
||||
}
|
||||
|
@ -88,6 +88,12 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
|
||||
sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o
|
||||
sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
|
||||
|
||||
quiet_cmd_perlasm = PERLASM $@
|
||||
cmd_perlasm = $(PERL) $< > $@
|
||||
$(obj)/%.S: $(src)/%.pl FORCE
|
||||
|
@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
|
||||
return -EINVAL;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (!walk.nbytes)
|
||||
return err;
|
||||
|
||||
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
|
||||
int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||
@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
blocks * AES_BLOCK_SIZE, req->iv);
|
||||
req = &subreq;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
} else {
|
||||
tail = 0;
|
||||
}
|
||||
|
589
arch/x86/crypto/sm4-aesni-avx-asm_64.S
Normal file
589
arch/x86/crypto/sm4-aesni-avx-asm_64.S
Normal file
@ -0,0 +1,589 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* SM4 Cipher Algorithm, AES-NI/AVX optimized.
|
||||
* as specified in
|
||||
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
|
||||
*
|
||||
* Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
* Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
|
||||
* https://github.com/mjosaarinen/sm4ni
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define rRIP (%rip)
|
||||
|
||||
#define RX0 %xmm0
|
||||
#define RX1 %xmm1
|
||||
#define MASK_4BIT %xmm2
|
||||
#define RTMP0 %xmm3
|
||||
#define RTMP1 %xmm4
|
||||
#define RTMP2 %xmm5
|
||||
#define RTMP3 %xmm6
|
||||
#define RTMP4 %xmm7
|
||||
|
||||
#define RA0 %xmm8
|
||||
#define RA1 %xmm9
|
||||
#define RA2 %xmm10
|
||||
#define RA3 %xmm11
|
||||
|
||||
#define RB0 %xmm12
|
||||
#define RB1 %xmm13
|
||||
#define RB2 %xmm14
|
||||
#define RB3 %xmm15
|
||||
|
||||
#define RNOT %xmm0
|
||||
#define RBSWAP %xmm1
|
||||
|
||||
|
||||
/* Transpose four 32-bit words between 128-bit vectors. */
|
||||
#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
|
||||
vpunpckhdq x1, x0, t2; \
|
||||
vpunpckldq x1, x0, x0; \
|
||||
\
|
||||
vpunpckldq x3, x2, t1; \
|
||||
vpunpckhdq x3, x2, x2; \
|
||||
\
|
||||
vpunpckhqdq t1, x0, x1; \
|
||||
vpunpcklqdq t1, x0, x0; \
|
||||
\
|
||||
vpunpckhqdq x2, t2, x3; \
|
||||
vpunpcklqdq x2, t2, x2;
|
||||
|
||||
/* pre-SubByte transform. */
|
||||
#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
|
||||
vpand x, mask4bit, tmp0; \
|
||||
vpandn x, mask4bit, x; \
|
||||
vpsrld $4, x, x; \
|
||||
\
|
||||
vpshufb tmp0, lo_t, tmp0; \
|
||||
vpshufb x, hi_t, x; \
|
||||
vpxor tmp0, x, x;
|
||||
|
||||
/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
|
||||
* 'vaeslastenc' instruction.
|
||||
*/
|
||||
#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
|
||||
vpandn mask4bit, x, tmp0; \
|
||||
vpsrld $4, x, x; \
|
||||
vpand x, mask4bit, x; \
|
||||
\
|
||||
vpshufb tmp0, lo_t, tmp0; \
|
||||
vpshufb x, hi_t, x; \
|
||||
vpxor tmp0, x, x;
|
||||
|
||||
|
||||
.section .rodata.cst164, "aM", @progbits, 164
|
||||
.align 16
|
||||
|
||||
/*
|
||||
* Following four affine transform look-up tables are from work by
|
||||
* Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
|
||||
*
|
||||
* These allow exposing SM4 S-Box from AES SubByte.
|
||||
*/
|
||||
|
||||
/* pre-SubByte affine transform, from SM4 field to AES field. */
|
||||
.Lpre_tf_lo_s:
|
||||
.quad 0x9197E2E474720701, 0xC7C1B4B222245157
|
||||
.Lpre_tf_hi_s:
|
||||
.quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
|
||||
|
||||
/* post-SubByte affine transform, from AES field to SM4 field. */
|
||||
.Lpost_tf_lo_s:
|
||||
.quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
|
||||
.Lpost_tf_hi_s:
|
||||
.quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
|
||||
|
||||
/* For isolating SubBytes from AESENCLAST, inverse shift row */
|
||||
.Linv_shift_row:
|
||||
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
|
||||
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
|
||||
|
||||
/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
|
||||
.Linv_shift_row_rol_8:
|
||||
.byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
|
||||
.byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
|
||||
|
||||
/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
|
||||
.Linv_shift_row_rol_16:
|
||||
.byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
|
||||
.byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
|
||||
|
||||
/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
|
||||
.Linv_shift_row_rol_24:
|
||||
.byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
|
||||
.byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
|
||||
|
||||
/* For CTR-mode IV byteswap */
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
/* For input word byte-swap */
|
||||
.Lbswap32_mask:
|
||||
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
|
||||
.align 4
|
||||
/* 4-bit mask */
|
||||
.L0f0f0f0f:
|
||||
.long 0x0f0f0f0f
|
||||
|
||||
|
||||
.text
|
||||
.align 16
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, int nblocks)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx_crypt4)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (1..4 blocks)
|
||||
* %rdx: src (1..4 blocks)
|
||||
* %rcx: num blocks (1..4)
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
vmovdqu 0*16(%rdx), RA0;
|
||||
vmovdqa RA0, RA1;
|
||||
vmovdqa RA0, RA2;
|
||||
vmovdqa RA0, RA3;
|
||||
cmpq $2, %rcx;
|
||||
jb .Lblk4_load_input_done;
|
||||
vmovdqu 1*16(%rdx), RA1;
|
||||
je .Lblk4_load_input_done;
|
||||
vmovdqu 2*16(%rdx), RA2;
|
||||
cmpq $3, %rcx;
|
||||
je .Lblk4_load_input_done;
|
||||
vmovdqu 3*16(%rdx), RA3;
|
||||
|
||||
.Lblk4_load_input_done:
|
||||
|
||||
vmovdqa .Lbswap32_mask rRIP, RTMP2;
|
||||
vpshufb RTMP2, RA0, RA0;
|
||||
vpshufb RTMP2, RA1, RA1;
|
||||
vpshufb RTMP2, RA2, RA2;
|
||||
vpshufb RTMP2, RA3, RA3;
|
||||
|
||||
vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
|
||||
vmovdqa .Lpre_tf_lo_s rRIP, RTMP4;
|
||||
vmovdqa .Lpre_tf_hi_s rRIP, RB0;
|
||||
vmovdqa .Lpost_tf_lo_s rRIP, RB1;
|
||||
vmovdqa .Lpost_tf_hi_s rRIP, RB2;
|
||||
vmovdqa .Linv_shift_row rRIP, RB3;
|
||||
vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2;
|
||||
vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3;
|
||||
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
|
||||
|
||||
#define ROUND(round, s0, s1, s2, s3) \
|
||||
vbroadcastss (4*(round))(%rdi), RX0; \
|
||||
vpxor s1, RX0, RX0; \
|
||||
vpxor s2, RX0, RX0; \
|
||||
vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
|
||||
\
|
||||
/* sbox, non-linear part */ \
|
||||
transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \
|
||||
vaesenclast MASK_4BIT, RX0, RX0; \
|
||||
transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \
|
||||
\
|
||||
/* linear part */ \
|
||||
vpshufb RB3, RX0, RTMP0; \
|
||||
vpxor RTMP0, s0, s0; /* s0 ^ x */ \
|
||||
vpshufb RTMP2, RX0, RTMP1; \
|
||||
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
|
||||
vpshufb RTMP3, RX0, RTMP1; \
|
||||
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
|
||||
vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \
|
||||
vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
|
||||
vpslld $2, RTMP0, RTMP1; \
|
||||
vpsrld $30, RTMP0, RTMP0; \
|
||||
vpxor RTMP0, s0, s0; \
|
||||
/* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
|
||||
vpxor RTMP1, s0, s0;
|
||||
|
||||
leaq (32*4)(%rdi), %rax;
|
||||
.align 16
|
||||
.Lroundloop_blk4:
|
||||
ROUND(0, RA0, RA1, RA2, RA3);
|
||||
ROUND(1, RA1, RA2, RA3, RA0);
|
||||
ROUND(2, RA2, RA3, RA0, RA1);
|
||||
ROUND(3, RA3, RA0, RA1, RA2);
|
||||
leaq (4*4)(%rdi), %rdi;
|
||||
cmpq %rax, %rdi;
|
||||
jne .Lroundloop_blk4;
|
||||
|
||||
#undef ROUND
|
||||
|
||||
vmovdqa .Lbswap128_mask rRIP, RTMP2;
|
||||
|
||||
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
|
||||
vpshufb RTMP2, RA0, RA0;
|
||||
vpshufb RTMP2, RA1, RA1;
|
||||
vpshufb RTMP2, RA2, RA2;
|
||||
vpshufb RTMP2, RA3, RA3;
|
||||
|
||||
vmovdqu RA0, 0*16(%rsi);
|
||||
cmpq $2, %rcx;
|
||||
jb .Lblk4_store_output_done;
|
||||
vmovdqu RA1, 1*16(%rsi);
|
||||
je .Lblk4_store_output_done;
|
||||
vmovdqu RA2, 2*16(%rsi);
|
||||
cmpq $3, %rcx;
|
||||
je .Lblk4_store_output_done;
|
||||
vmovdqu RA3, 3*16(%rsi);
|
||||
|
||||
.Lblk4_store_output_done:
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx_crypt4)
|
||||
|
||||
.align 8
|
||||
SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
|
||||
* plaintext blocks
|
||||
* output:
|
||||
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
|
||||
* ciphertext blocks
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
vmovdqa .Lbswap32_mask rRIP, RTMP2;
|
||||
vpshufb RTMP2, RA0, RA0;
|
||||
vpshufb RTMP2, RA1, RA1;
|
||||
vpshufb RTMP2, RA2, RA2;
|
||||
vpshufb RTMP2, RA3, RA3;
|
||||
vpshufb RTMP2, RB0, RB0;
|
||||
vpshufb RTMP2, RB1, RB1;
|
||||
vpshufb RTMP2, RB2, RB2;
|
||||
vpshufb RTMP2, RB3, RB3;
|
||||
|
||||
vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
|
||||
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
|
||||
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
|
||||
|
||||
#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
|
||||
vbroadcastss (4*(round))(%rdi), RX0; \
|
||||
vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \
|
||||
vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \
|
||||
vmovdqa RX0, RX1; \
|
||||
vpxor s1, RX0, RX0; \
|
||||
vpxor s2, RX0, RX0; \
|
||||
vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
|
||||
vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \
|
||||
vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \
|
||||
vpxor r1, RX1, RX1; \
|
||||
vpxor r2, RX1, RX1; \
|
||||
vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
|
||||
\
|
||||
/* sbox, non-linear part */ \
|
||||
transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
|
||||
transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
|
||||
vmovdqa .Linv_shift_row rRIP, RTMP4; \
|
||||
vaesenclast MASK_4BIT, RX0, RX0; \
|
||||
vaesenclast MASK_4BIT, RX1, RX1; \
|
||||
transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
|
||||
transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
|
||||
\
|
||||
/* linear part */ \
|
||||
vpshufb RTMP4, RX0, RTMP0; \
|
||||
vpxor RTMP0, s0, s0; /* s0 ^ x */ \
|
||||
vpshufb RTMP4, RX1, RTMP2; \
|
||||
vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \
|
||||
vpxor RTMP2, r0, r0; /* r0 ^ x */ \
|
||||
vpshufb RTMP4, RX0, RTMP1; \
|
||||
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
|
||||
vpshufb RTMP4, RX1, RTMP3; \
|
||||
vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \
|
||||
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
|
||||
vpshufb RTMP4, RX0, RTMP1; \
|
||||
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
|
||||
vpshufb RTMP4, RX1, RTMP3; \
|
||||
vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \
|
||||
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
|
||||
vpshufb RTMP4, RX0, RTMP1; \
|
||||
vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
|
||||
/* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
|
||||
vpslld $2, RTMP0, RTMP1; \
|
||||
vpsrld $30, RTMP0, RTMP0; \
|
||||
vpxor RTMP0, s0, s0; \
|
||||
vpxor RTMP1, s0, s0; \
|
||||
vpshufb RTMP4, RX1, RTMP3; \
|
||||
vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
|
||||
/* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
|
||||
vpslld $2, RTMP2, RTMP3; \
|
||||
vpsrld $30, RTMP2, RTMP2; \
|
||||
vpxor RTMP2, r0, r0; \
|
||||
vpxor RTMP3, r0, r0;
|
||||
|
||||
leaq (32*4)(%rdi), %rax;
|
||||
.align 16
|
||||
.Lroundloop_blk8:
|
||||
ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
|
||||
ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
|
||||
ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
|
||||
ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
|
||||
leaq (4*4)(%rdi), %rdi;
|
||||
cmpq %rax, %rdi;
|
||||
jne .Lroundloop_blk8;
|
||||
|
||||
#undef ROUND
|
||||
|
||||
vmovdqa .Lbswap128_mask rRIP, RTMP2;
|
||||
|
||||
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
|
||||
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
|
||||
vpshufb RTMP2, RA0, RA0;
|
||||
vpshufb RTMP2, RA1, RA1;
|
||||
vpshufb RTMP2, RA2, RA2;
|
||||
vpshufb RTMP2, RA3, RA3;
|
||||
vpshufb RTMP2, RB0, RB0;
|
||||
vpshufb RTMP2, RB1, RB1;
|
||||
vpshufb RTMP2, RB2, RB2;
|
||||
vpshufb RTMP2, RB3, RB3;
|
||||
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(__sm4_crypt_blk8)
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, int nblocks)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx_crypt8)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (1..8 blocks)
|
||||
* %rdx: src (1..8 blocks)
|
||||
* %rcx: num blocks (1..8)
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
cmpq $5, %rcx;
|
||||
jb sm4_aesni_avx_crypt4;
|
||||
vmovdqu (0 * 16)(%rdx), RA0;
|
||||
vmovdqu (1 * 16)(%rdx), RA1;
|
||||
vmovdqu (2 * 16)(%rdx), RA2;
|
||||
vmovdqu (3 * 16)(%rdx), RA3;
|
||||
vmovdqu (4 * 16)(%rdx), RB0;
|
||||
vmovdqa RB0, RB1;
|
||||
vmovdqa RB0, RB2;
|
||||
vmovdqa RB0, RB3;
|
||||
je .Lblk8_load_input_done;
|
||||
vmovdqu (5 * 16)(%rdx), RB1;
|
||||
cmpq $7, %rcx;
|
||||
jb .Lblk8_load_input_done;
|
||||
vmovdqu (6 * 16)(%rdx), RB2;
|
||||
je .Lblk8_load_input_done;
|
||||
vmovdqu (7 * 16)(%rdx), RB3;
|
||||
|
||||
.Lblk8_load_input_done:
|
||||
call __sm4_crypt_blk8;
|
||||
|
||||
cmpq $6, %rcx;
|
||||
vmovdqu RA0, (0 * 16)(%rsi);
|
||||
vmovdqu RA1, (1 * 16)(%rsi);
|
||||
vmovdqu RA2, (2 * 16)(%rsi);
|
||||
vmovdqu RA3, (3 * 16)(%rsi);
|
||||
vmovdqu RB0, (4 * 16)(%rsi);
|
||||
jb .Lblk8_store_output_done;
|
||||
vmovdqu RB1, (5 * 16)(%rsi);
|
||||
je .Lblk8_store_output_done;
|
||||
vmovdqu RB2, (6 * 16)(%rsi);
|
||||
cmpq $7, %rcx;
|
||||
je .Lblk8_store_output_done;
|
||||
vmovdqu RB3, (7 * 16)(%rsi);
|
||||
|
||||
.Lblk8_store_output_done:
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx_crypt8)
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, u8 *iv)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (8 blocks)
|
||||
* %rdx: src (8 blocks)
|
||||
* %rcx: iv (big endian, 128bit)
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load IV and byteswap */
|
||||
vmovdqu (%rcx), RA0;
|
||||
|
||||
vmovdqa .Lbswap128_mask rRIP, RBSWAP;
|
||||
vpshufb RBSWAP, RA0, RTMP0; /* be => le */
|
||||
|
||||
vpcmpeqd RNOT, RNOT, RNOT;
|
||||
vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */
|
||||
|
||||
#define inc_le128(x, minus_one, tmp) \
|
||||
vpcmpeqq minus_one, x, tmp; \
|
||||
vpsubq minus_one, x, x; \
|
||||
vpslldq $8, tmp, tmp; \
|
||||
vpsubq tmp, x, x;
|
||||
|
||||
/* construct IVs */
|
||||
inc_le128(RTMP0, RNOT, RTMP2); /* +1 */
|
||||
vpshufb RBSWAP, RTMP0, RA1;
|
||||
inc_le128(RTMP0, RNOT, RTMP2); /* +2 */
|
||||
vpshufb RBSWAP, RTMP0, RA2;
|
||||
inc_le128(RTMP0, RNOT, RTMP2); /* +3 */
|
||||
vpshufb RBSWAP, RTMP0, RA3;
|
||||
inc_le128(RTMP0, RNOT, RTMP2); /* +4 */
|
||||
vpshufb RBSWAP, RTMP0, RB0;
|
||||
inc_le128(RTMP0, RNOT, RTMP2); /* +5 */
|
||||
vpshufb RBSWAP, RTMP0, RB1;
|
||||
inc_le128(RTMP0, RNOT, RTMP2); /* +6 */
|
||||
vpshufb RBSWAP, RTMP0, RB2;
|
||||
inc_le128(RTMP0, RNOT, RTMP2); /* +7 */
|
||||
vpshufb RBSWAP, RTMP0, RB3;
|
||||
inc_le128(RTMP0, RNOT, RTMP2); /* +8 */
|
||||
vpshufb RBSWAP, RTMP0, RTMP1;
|
||||
|
||||
/* store new IV */
|
||||
vmovdqu RTMP1, (%rcx);
|
||||
|
||||
call __sm4_crypt_blk8;
|
||||
|
||||
vpxor (0 * 16)(%rdx), RA0, RA0;
|
||||
vpxor (1 * 16)(%rdx), RA1, RA1;
|
||||
vpxor (2 * 16)(%rdx), RA2, RA2;
|
||||
vpxor (3 * 16)(%rdx), RA3, RA3;
|
||||
vpxor (4 * 16)(%rdx), RB0, RB0;
|
||||
vpxor (5 * 16)(%rdx), RB1, RB1;
|
||||
vpxor (6 * 16)(%rdx), RB2, RB2;
|
||||
vpxor (7 * 16)(%rdx), RB3, RB3;
|
||||
|
||||
vmovdqu RA0, (0 * 16)(%rsi);
|
||||
vmovdqu RA1, (1 * 16)(%rsi);
|
||||
vmovdqu RA2, (2 * 16)(%rsi);
|
||||
vmovdqu RA3, (3 * 16)(%rsi);
|
||||
vmovdqu RB0, (4 * 16)(%rsi);
|
||||
vmovdqu RB1, (5 * 16)(%rsi);
|
||||
vmovdqu RB2, (6 * 16)(%rsi);
|
||||
vmovdqu RB3, (7 * 16)(%rsi);
|
||||
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, u8 *iv)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (8 blocks)
|
||||
* %rdx: src (8 blocks)
|
||||
* %rcx: iv
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
vmovdqu (0 * 16)(%rdx), RA0;
|
||||
vmovdqu (1 * 16)(%rdx), RA1;
|
||||
vmovdqu (2 * 16)(%rdx), RA2;
|
||||
vmovdqu (3 * 16)(%rdx), RA3;
|
||||
vmovdqu (4 * 16)(%rdx), RB0;
|
||||
vmovdqu (5 * 16)(%rdx), RB1;
|
||||
vmovdqu (6 * 16)(%rdx), RB2;
|
||||
vmovdqu (7 * 16)(%rdx), RB3;
|
||||
|
||||
call __sm4_crypt_blk8;
|
||||
|
||||
vmovdqu (7 * 16)(%rdx), RNOT;
|
||||
vpxor (%rcx), RA0, RA0;
|
||||
vpxor (0 * 16)(%rdx), RA1, RA1;
|
||||
vpxor (1 * 16)(%rdx), RA2, RA2;
|
||||
vpxor (2 * 16)(%rdx), RA3, RA3;
|
||||
vpxor (3 * 16)(%rdx), RB0, RB0;
|
||||
vpxor (4 * 16)(%rdx), RB1, RB1;
|
||||
vpxor (5 * 16)(%rdx), RB2, RB2;
|
||||
vpxor (6 * 16)(%rdx), RB3, RB3;
|
||||
vmovdqu RNOT, (%rcx); /* store new IV */
|
||||
|
||||
vmovdqu RA0, (0 * 16)(%rsi);
|
||||
vmovdqu RA1, (1 * 16)(%rsi);
|
||||
vmovdqu RA2, (2 * 16)(%rsi);
|
||||
vmovdqu RA3, (3 * 16)(%rsi);
|
||||
vmovdqu RB0, (4 * 16)(%rsi);
|
||||
vmovdqu RB1, (5 * 16)(%rsi);
|
||||
vmovdqu RB2, (6 * 16)(%rsi);
|
||||
vmovdqu RB3, (7 * 16)(%rsi);
|
||||
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, u8 *iv)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (8 blocks)
|
||||
* %rdx: src (8 blocks)
|
||||
* %rcx: iv
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
/* Load input */
|
||||
vmovdqu (%rcx), RA0;
|
||||
vmovdqu 0 * 16(%rdx), RA1;
|
||||
vmovdqu 1 * 16(%rdx), RA2;
|
||||
vmovdqu 2 * 16(%rdx), RA3;
|
||||
vmovdqu 3 * 16(%rdx), RB0;
|
||||
vmovdqu 4 * 16(%rdx), RB1;
|
||||
vmovdqu 5 * 16(%rdx), RB2;
|
||||
vmovdqu 6 * 16(%rdx), RB3;
|
||||
|
||||
/* Update IV */
|
||||
vmovdqu 7 * 16(%rdx), RNOT;
|
||||
vmovdqu RNOT, (%rcx);
|
||||
|
||||
call __sm4_crypt_blk8;
|
||||
|
||||
vpxor (0 * 16)(%rdx), RA0, RA0;
|
||||
vpxor (1 * 16)(%rdx), RA1, RA1;
|
||||
vpxor (2 * 16)(%rdx), RA2, RA2;
|
||||
vpxor (3 * 16)(%rdx), RA3, RA3;
|
||||
vpxor (4 * 16)(%rdx), RB0, RB0;
|
||||
vpxor (5 * 16)(%rdx), RB1, RB1;
|
||||
vpxor (6 * 16)(%rdx), RB2, RB2;
|
||||
vpxor (7 * 16)(%rdx), RB3, RB3;
|
||||
|
||||
vmovdqu RA0, (0 * 16)(%rsi);
|
||||
vmovdqu RA1, (1 * 16)(%rsi);
|
||||
vmovdqu RA2, (2 * 16)(%rsi);
|
||||
vmovdqu RA3, (3 * 16)(%rsi);
|
||||
vmovdqu RB0, (4 * 16)(%rsi);
|
||||
vmovdqu RB1, (5 * 16)(%rsi);
|
||||
vmovdqu RB2, (6 * 16)(%rsi);
|
||||
vmovdqu RB3, (7 * 16)(%rsi);
|
||||
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8)
|
497
arch/x86/crypto/sm4-aesni-avx2-asm_64.S
Normal file
497
arch/x86/crypto/sm4-aesni-avx2-asm_64.S
Normal file
@ -0,0 +1,497 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
|
||||
* as specified in
|
||||
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
|
||||
*
|
||||
* Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
* Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
|
||||
* https://github.com/mjosaarinen/sm4ni
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define rRIP (%rip)
|
||||
|
||||
/* vector registers */
|
||||
#define RX0 %ymm0
|
||||
#define RX1 %ymm1
|
||||
#define MASK_4BIT %ymm2
|
||||
#define RTMP0 %ymm3
|
||||
#define RTMP1 %ymm4
|
||||
#define RTMP2 %ymm5
|
||||
#define RTMP3 %ymm6
|
||||
#define RTMP4 %ymm7
|
||||
|
||||
#define RA0 %ymm8
|
||||
#define RA1 %ymm9
|
||||
#define RA2 %ymm10
|
||||
#define RA3 %ymm11
|
||||
|
||||
#define RB0 %ymm12
|
||||
#define RB1 %ymm13
|
||||
#define RB2 %ymm14
|
||||
#define RB3 %ymm15
|
||||
|
||||
#define RNOT %ymm0
|
||||
#define RBSWAP %ymm1
|
||||
|
||||
#define RX0x %xmm0
|
||||
#define RX1x %xmm1
|
||||
#define MASK_4BITx %xmm2
|
||||
|
||||
#define RNOTx %xmm0
|
||||
#define RBSWAPx %xmm1
|
||||
|
||||
#define RTMP0x %xmm3
|
||||
#define RTMP1x %xmm4
|
||||
#define RTMP2x %xmm5
|
||||
#define RTMP3x %xmm6
|
||||
#define RTMP4x %xmm7
|
||||
|
||||
|
||||
/* helper macros */
|
||||
|
||||
/* Transpose four 32-bit words between 128-bit vector lanes. */
|
||||
#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
|
||||
vpunpckhdq x1, x0, t2; \
|
||||
vpunpckldq x1, x0, x0; \
|
||||
\
|
||||
vpunpckldq x3, x2, t1; \
|
||||
vpunpckhdq x3, x2, x2; \
|
||||
\
|
||||
vpunpckhqdq t1, x0, x1; \
|
||||
vpunpcklqdq t1, x0, x0; \
|
||||
\
|
||||
vpunpckhqdq x2, t2, x3; \
|
||||
vpunpcklqdq x2, t2, x2;
|
||||
|
||||
/* post-SubByte transform. */
|
||||
#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
|
||||
vpand x, mask4bit, tmp0; \
|
||||
vpandn x, mask4bit, x; \
|
||||
vpsrld $4, x, x; \
|
||||
\
|
||||
vpshufb tmp0, lo_t, tmp0; \
|
||||
vpshufb x, hi_t, x; \
|
||||
vpxor tmp0, x, x;
|
||||
|
||||
/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
|
||||
* 'vaeslastenc' instruction. */
|
||||
#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
|
||||
vpandn mask4bit, x, tmp0; \
|
||||
vpsrld $4, x, x; \
|
||||
vpand x, mask4bit, x; \
|
||||
\
|
||||
vpshufb tmp0, lo_t, tmp0; \
|
||||
vpshufb x, hi_t, x; \
|
||||
vpxor tmp0, x, x;
|
||||
|
||||
|
||||
.section .rodata.cst164, "aM", @progbits, 164
|
||||
.align 16
|
||||
|
||||
/*
|
||||
* Following four affine transform look-up tables are from work by
|
||||
* Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
|
||||
*
|
||||
* These allow exposing SM4 S-Box from AES SubByte.
|
||||
*/
|
||||
|
||||
/* pre-SubByte affine transform, from SM4 field to AES field. */
|
||||
.Lpre_tf_lo_s:
|
||||
.quad 0x9197E2E474720701, 0xC7C1B4B222245157
|
||||
.Lpre_tf_hi_s:
|
||||
.quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
|
||||
|
||||
/* post-SubByte affine transform, from AES field to SM4 field. */
|
||||
.Lpost_tf_lo_s:
|
||||
.quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
|
||||
.Lpost_tf_hi_s:
|
||||
.quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
|
||||
|
||||
/* For isolating SubBytes from AESENCLAST, inverse shift row */
|
||||
.Linv_shift_row:
|
||||
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
|
||||
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
|
||||
|
||||
/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
|
||||
.Linv_shift_row_rol_8:
|
||||
.byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
|
||||
.byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
|
||||
|
||||
/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
|
||||
.Linv_shift_row_rol_16:
|
||||
.byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
|
||||
.byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
|
||||
|
||||
/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
|
||||
.Linv_shift_row_rol_24:
|
||||
.byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
|
||||
.byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
|
||||
|
||||
/* For CTR-mode IV byteswap */
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
/* For input word byte-swap */
|
||||
.Lbswap32_mask:
|
||||
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
|
||||
.align 4
|
||||
/* 4-bit mask */
|
||||
.L0f0f0f0f:
|
||||
.long 0x0f0f0f0f
|
||||
|
||||
.text
|
||||
.align 16
|
||||
|
||||
.align 8
|
||||
SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
|
||||
* plaintext blocks
|
||||
* output:
|
||||
* RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
|
||||
* ciphertext blocks
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
vbroadcasti128 .Lbswap32_mask rRIP, RTMP2;
|
||||
vpshufb RTMP2, RA0, RA0;
|
||||
vpshufb RTMP2, RA1, RA1;
|
||||
vpshufb RTMP2, RA2, RA2;
|
||||
vpshufb RTMP2, RA3, RA3;
|
||||
vpshufb RTMP2, RB0, RB0;
|
||||
vpshufb RTMP2, RB1, RB1;
|
||||
vpshufb RTMP2, RB2, RB2;
|
||||
vpshufb RTMP2, RB3, RB3;
|
||||
|
||||
vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT;
|
||||
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
|
||||
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
|
||||
|
||||
#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
|
||||
vpbroadcastd (4*(round))(%rdi), RX0; \
|
||||
vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \
|
||||
vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \
|
||||
vmovdqa RX0, RX1; \
|
||||
vpxor s1, RX0, RX0; \
|
||||
vpxor s2, RX0, RX0; \
|
||||
vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
|
||||
vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \
|
||||
vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \
|
||||
vpxor r1, RX1, RX1; \
|
||||
vpxor r2, RX1, RX1; \
|
||||
vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
|
||||
\
|
||||
/* sbox, non-linear part */ \
|
||||
transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
|
||||
transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
|
||||
vextracti128 $1, RX0, RTMP4x; \
|
||||
vextracti128 $1, RX1, RTMP0x; \
|
||||
vaesenclast MASK_4BITx, RX0x, RX0x; \
|
||||
vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \
|
||||
vaesenclast MASK_4BITx, RX1x, RX1x; \
|
||||
vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \
|
||||
vinserti128 $1, RTMP4x, RX0, RX0; \
|
||||
vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \
|
||||
vinserti128 $1, RTMP0x, RX1, RX1; \
|
||||
transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
|
||||
transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
|
||||
\
|
||||
/* linear part */ \
|
||||
vpshufb RTMP4, RX0, RTMP0; \
|
||||
vpxor RTMP0, s0, s0; /* s0 ^ x */ \
|
||||
vpshufb RTMP4, RX1, RTMP2; \
|
||||
vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \
|
||||
vpxor RTMP2, r0, r0; /* r0 ^ x */ \
|
||||
vpshufb RTMP4, RX0, RTMP1; \
|
||||
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
|
||||
vpshufb RTMP4, RX1, RTMP3; \
|
||||
vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \
|
||||
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
|
||||
vpshufb RTMP4, RX0, RTMP1; \
|
||||
vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
|
||||
vpshufb RTMP4, RX1, RTMP3; \
|
||||
vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \
|
||||
vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
|
||||
vpshufb RTMP4, RX0, RTMP1; \
|
||||
vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
|
||||
vpslld $2, RTMP0, RTMP1; \
|
||||
vpsrld $30, RTMP0, RTMP0; \
|
||||
vpxor RTMP0, s0, s0; \
|
||||
/* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
|
||||
vpxor RTMP1, s0, s0; \
|
||||
vpshufb RTMP4, RX1, RTMP3; \
|
||||
vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
|
||||
vpslld $2, RTMP2, RTMP3; \
|
||||
vpsrld $30, RTMP2, RTMP2; \
|
||||
vpxor RTMP2, r0, r0; \
|
||||
/* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
|
||||
vpxor RTMP3, r0, r0;
|
||||
|
||||
leaq (32*4)(%rdi), %rax;
|
||||
.align 16
|
||||
.Lroundloop_blk8:
|
||||
ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
|
||||
ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
|
||||
ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
|
||||
ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
|
||||
leaq (4*4)(%rdi), %rdi;
|
||||
cmpq %rax, %rdi;
|
||||
jne .Lroundloop_blk8;
|
||||
|
||||
#undef ROUND
|
||||
|
||||
vbroadcasti128 .Lbswap128_mask rRIP, RTMP2;
|
||||
|
||||
transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
|
||||
transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
|
||||
vpshufb RTMP2, RA0, RA0;
|
||||
vpshufb RTMP2, RA1, RA1;
|
||||
vpshufb RTMP2, RA2, RA2;
|
||||
vpshufb RTMP2, RA3, RA3;
|
||||
vpshufb RTMP2, RB0, RB0;
|
||||
vpshufb RTMP2, RB1, RB1;
|
||||
vpshufb RTMP2, RB2, RB2;
|
||||
vpshufb RTMP2, RB3, RB3;
|
||||
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(__sm4_crypt_blk16)
|
||||
|
||||
#define inc_le128(x, minus_one, tmp) \
|
||||
vpcmpeqq minus_one, x, tmp; \
|
||||
vpsubq minus_one, x, x; \
|
||||
vpslldq $8, tmp, tmp; \
|
||||
vpsubq tmp, x, x;
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, u8 *iv)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv (big endian, 128bit)
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
movq 8(%rcx), %rax;
|
||||
bswapq %rax;
|
||||
|
||||
vzeroupper;
|
||||
|
||||
vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
|
||||
vpcmpeqd RNOT, RNOT, RNOT;
|
||||
vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
|
||||
vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
|
||||
|
||||
/* load IV and byteswap */
|
||||
vmovdqu (%rcx), RTMP4x;
|
||||
vpshufb RTMP3x, RTMP4x, RTMP4x;
|
||||
vmovdqa RTMP4x, RTMP0x;
|
||||
inc_le128(RTMP4x, RNOTx, RTMP1x);
|
||||
vinserti128 $1, RTMP4x, RTMP0, RTMP0;
|
||||
vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
|
||||
|
||||
/* check need for handling 64-bit overflow and carry */
|
||||
cmpq $(0xffffffffffffffff - 16), %rax;
|
||||
ja .Lhandle_ctr_carry;
|
||||
|
||||
/* construct IVs */
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
|
||||
vpshufb RTMP3, RTMP0, RA1;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
|
||||
vpshufb RTMP3, RTMP0, RA2;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
|
||||
vpshufb RTMP3, RTMP0, RA3;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
|
||||
vpshufb RTMP3, RTMP0, RB0;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
|
||||
vpshufb RTMP3, RTMP0, RB1;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
|
||||
vpshufb RTMP3, RTMP0, RB2;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
|
||||
vpshufb RTMP3, RTMP0, RB3;
|
||||
vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
|
||||
vpshufb RTMP3x, RTMP0x, RTMP0x;
|
||||
|
||||
jmp .Lctr_carry_done;
|
||||
|
||||
.Lhandle_ctr_carry:
|
||||
/* construct IVs */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
|
||||
inc_le128(RTMP0, RNOT, RTMP1);
|
||||
vextracti128 $1, RTMP0, RTMP0x;
|
||||
vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
|
||||
|
||||
.align 4
|
||||
.Lctr_carry_done:
|
||||
/* store new IV */
|
||||
vmovdqu RTMP0x, (%rcx);
|
||||
|
||||
call __sm4_crypt_blk16;
|
||||
|
||||
vpxor (0 * 32)(%rdx), RA0, RA0;
|
||||
vpxor (1 * 32)(%rdx), RA1, RA1;
|
||||
vpxor (2 * 32)(%rdx), RA2, RA2;
|
||||
vpxor (3 * 32)(%rdx), RA3, RA3;
|
||||
vpxor (4 * 32)(%rdx), RB0, RB0;
|
||||
vpxor (5 * 32)(%rdx), RB1, RB1;
|
||||
vpxor (6 * 32)(%rdx), RB2, RB2;
|
||||
vpxor (7 * 32)(%rdx), RB3, RB3;
|
||||
|
||||
vmovdqu RA0, (0 * 32)(%rsi);
|
||||
vmovdqu RA1, (1 * 32)(%rsi);
|
||||
vmovdqu RA2, (2 * 32)(%rsi);
|
||||
vmovdqu RA3, (3 * 32)(%rsi);
|
||||
vmovdqu RB0, (4 * 32)(%rsi);
|
||||
vmovdqu RB1, (5 * 32)(%rsi);
|
||||
vmovdqu RB2, (6 * 32)(%rsi);
|
||||
vmovdqu RB3, (7 * 32)(%rsi);
|
||||
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, u8 *iv)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
vzeroupper;
|
||||
|
||||
vmovdqu (0 * 32)(%rdx), RA0;
|
||||
vmovdqu (1 * 32)(%rdx), RA1;
|
||||
vmovdqu (2 * 32)(%rdx), RA2;
|
||||
vmovdqu (3 * 32)(%rdx), RA3;
|
||||
vmovdqu (4 * 32)(%rdx), RB0;
|
||||
vmovdqu (5 * 32)(%rdx), RB1;
|
||||
vmovdqu (6 * 32)(%rdx), RB2;
|
||||
vmovdqu (7 * 32)(%rdx), RB3;
|
||||
|
||||
call __sm4_crypt_blk16;
|
||||
|
||||
vmovdqu (%rcx), RNOTx;
|
||||
vinserti128 $1, (%rdx), RNOT, RNOT;
|
||||
vpxor RNOT, RA0, RA0;
|
||||
vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
|
||||
vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
|
||||
vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
|
||||
vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
|
||||
vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
|
||||
vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
|
||||
vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
|
||||
vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
|
||||
vmovdqu RNOTx, (%rcx); /* store new IV */
|
||||
|
||||
vmovdqu RA0, (0 * 32)(%rsi);
|
||||
vmovdqu RA1, (1 * 32)(%rsi);
|
||||
vmovdqu RA2, (2 * 32)(%rsi);
|
||||
vmovdqu RA3, (3 * 32)(%rsi);
|
||||
vmovdqu RB0, (4 * 32)(%rsi);
|
||||
vmovdqu RB1, (5 * 32)(%rsi);
|
||||
vmovdqu RB2, (6 * 32)(%rsi);
|
||||
vmovdqu RB3, (7 * 32)(%rsi);
|
||||
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
|
||||
|
||||
/*
|
||||
* void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
|
||||
* const u8 *src, u8 *iv)
|
||||
*/
|
||||
.align 8
|
||||
SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
|
||||
/* input:
|
||||
* %rdi: round key array, CTX
|
||||
* %rsi: dst (16 blocks)
|
||||
* %rdx: src (16 blocks)
|
||||
* %rcx: iv
|
||||
*/
|
||||
FRAME_BEGIN
|
||||
|
||||
vzeroupper;
|
||||
|
||||
/* Load input */
|
||||
vmovdqu (%rcx), RNOTx;
|
||||
vinserti128 $1, (%rdx), RNOT, RA0;
|
||||
vmovdqu (0 * 32 + 16)(%rdx), RA1;
|
||||
vmovdqu (1 * 32 + 16)(%rdx), RA2;
|
||||
vmovdqu (2 * 32 + 16)(%rdx), RA3;
|
||||
vmovdqu (3 * 32 + 16)(%rdx), RB0;
|
||||
vmovdqu (4 * 32 + 16)(%rdx), RB1;
|
||||
vmovdqu (5 * 32 + 16)(%rdx), RB2;
|
||||
vmovdqu (6 * 32 + 16)(%rdx), RB3;
|
||||
|
||||
/* Update IV */
|
||||
vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
|
||||
vmovdqu RNOTx, (%rcx);
|
||||
|
||||
call __sm4_crypt_blk16;
|
||||
|
||||
vpxor (0 * 32)(%rdx), RA0, RA0;
|
||||
vpxor (1 * 32)(%rdx), RA1, RA1;
|
||||
vpxor (2 * 32)(%rdx), RA2, RA2;
|
||||
vpxor (3 * 32)(%rdx), RA3, RA3;
|
||||
vpxor (4 * 32)(%rdx), RB0, RB0;
|
||||
vpxor (5 * 32)(%rdx), RB1, RB1;
|
||||
vpxor (6 * 32)(%rdx), RB2, RB2;
|
||||
vpxor (7 * 32)(%rdx), RB3, RB3;
|
||||
|
||||
vmovdqu RA0, (0 * 32)(%rsi);
|
||||
vmovdqu RA1, (1 * 32)(%rsi);
|
||||
vmovdqu RA2, (2 * 32)(%rsi);
|
||||
vmovdqu RA3, (3 * 32)(%rsi);
|
||||
vmovdqu RB0, (4 * 32)(%rsi);
|
||||
vmovdqu RB1, (5 * 32)(%rsi);
|
||||
vmovdqu RB2, (6 * 32)(%rsi);
|
||||
vmovdqu RB3, (7 * 32)(%rsi);
|
||||
|
||||
vzeroall;
|
||||
FRAME_END
|
||||
ret;
|
||||
SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16)
|
24
arch/x86/crypto/sm4-avx.h
Normal file
24
arch/x86/crypto/sm4-avx.h
Normal file
@ -0,0 +1,24 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef ASM_X86_SM4_AVX_H
|
||||
#define ASM_X86_SM4_AVX_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <crypto/sm4.h>
|
||||
|
||||
typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv);
|
||||
|
||||
int sm4_avx_ecb_encrypt(struct skcipher_request *req);
|
||||
int sm4_avx_ecb_decrypt(struct skcipher_request *req);
|
||||
|
||||
int sm4_cbc_encrypt(struct skcipher_request *req);
|
||||
int sm4_avx_cbc_decrypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func);
|
||||
|
||||
int sm4_cfb_encrypt(struct skcipher_request *req);
|
||||
int sm4_avx_cfb_decrypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func);
|
||||
|
||||
int sm4_avx_ctr_crypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func);
|
||||
|
||||
#endif
|
169
arch/x86/crypto/sm4_aesni_avx2_glue.c
Normal file
169
arch/x86/crypto/sm4_aesni_avx2_glue.c
Normal file
@ -0,0 +1,169 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
|
||||
* as specified in
|
||||
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
|
||||
*
|
||||
* Copyright (c) 2021, Alibaba Group.
|
||||
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/sm4.h>
|
||||
#include "sm4-avx.h"
|
||||
|
||||
#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16)
|
||||
|
||||
asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
|
||||
static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return sm4_expandkey(ctx, key, key_len);
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
|
||||
sm4_aesni_avx2_cbc_dec_blk16);
|
||||
}
|
||||
|
||||
|
||||
static int cfb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
|
||||
sm4_aesni_avx2_cfb_dec_blk16);
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE,
|
||||
sm4_aesni_avx2_ctr_enc_blk16);
|
||||
}
|
||||
|
||||
static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
|
||||
{
|
||||
.base = {
|
||||
.cra_name = "__ecb(sm4)",
|
||||
.cra_driver_name = "__ecb-sm4-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.walksize = 16 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_avx_ecb_encrypt,
|
||||
.decrypt = sm4_avx_ecb_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cbc(sm4)",
|
||||
.cra_driver_name = "__cbc-sm4-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.walksize = 16 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cfb(sm4)",
|
||||
.cra_driver_name = "__cfb-sm4-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.chunksize = SM4_BLOCK_SIZE,
|
||||
.walksize = 16 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_cfb_encrypt,
|
||||
.decrypt = cfb_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__ctr(sm4)",
|
||||
.cra_driver_name = "__ctr-sm4-aesni-avx2",
|
||||
.cra_priority = 500,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.chunksize = SM4_BLOCK_SIZE,
|
||||
.walksize = 16 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
}
|
||||
};
|
||||
|
||||
static struct simd_skcipher_alg *
|
||||
simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
|
||||
|
||||
static int __init sm4_init(void)
|
||||
{
|
||||
const char *feature_name;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_AVX) ||
|
||||
!boot_cpu_has(X86_FEATURE_AVX2) ||
|
||||
!boot_cpu_has(X86_FEATURE_AES) ||
|
||||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
|
||||
pr_info("AVX2 or AES-NI instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers,
|
||||
ARRAY_SIZE(sm4_aesni_avx2_skciphers),
|
||||
simd_sm4_aesni_avx2_skciphers);
|
||||
}
|
||||
|
||||
static void __exit sm4_exit(void)
|
||||
{
|
||||
simd_unregister_skciphers(sm4_aesni_avx2_skciphers,
|
||||
ARRAY_SIZE(sm4_aesni_avx2_skciphers),
|
||||
simd_sm4_aesni_avx2_skciphers);
|
||||
}
|
||||
|
||||
module_init(sm4_init);
|
||||
module_exit(sm4_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
|
||||
MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized");
|
||||
MODULE_ALIAS_CRYPTO("sm4");
|
||||
MODULE_ALIAS_CRYPTO("sm4-aesni-avx2");
|
487
arch/x86/crypto/sm4_aesni_avx_glue.c
Normal file
487
arch/x86/crypto/sm4_aesni_avx_glue.c
Normal file
@ -0,0 +1,487 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* SM4 Cipher Algorithm, AES-NI/AVX optimized.
|
||||
* as specified in
|
||||
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
|
||||
*
|
||||
* Copyright (c) 2021, Alibaba Group.
|
||||
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/sm4.h>
|
||||
#include "sm4-avx.h"
|
||||
|
||||
#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8)
|
||||
|
||||
asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
|
||||
const u8 *src, int nblocks);
|
||||
asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
|
||||
const u8 *src, int nblocks);
|
||||
asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
|
||||
const u8 *src, u8 *iv);
|
||||
|
||||
static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return sm4_expandkey(ctx, key, key_len);
|
||||
}
|
||||
|
||||
static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
kernel_fpu_begin();
|
||||
while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
|
||||
sm4_aesni_avx_crypt8(rkey, dst, src, 8);
|
||||
dst += SM4_CRYPT8_BLOCK_SIZE;
|
||||
src += SM4_CRYPT8_BLOCK_SIZE;
|
||||
nbytes -= SM4_CRYPT8_BLOCK_SIZE;
|
||||
}
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
unsigned int nblocks = min(nbytes >> 4, 4u);
|
||||
sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
|
||||
dst += nblocks * SM4_BLOCK_SIZE;
|
||||
src += nblocks * SM4_BLOCK_SIZE;
|
||||
nbytes -= nblocks * SM4_BLOCK_SIZE;
|
||||
}
|
||||
kernel_fpu_end();
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int sm4_avx_ecb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return ecb_do_crypt(req, ctx->rkey_enc);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
|
||||
|
||||
int sm4_avx_ecb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return ecb_do_crypt(req, ctx->rkey_dec);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
|
||||
|
||||
int sm4_cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *iv = walk.iv;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
|
||||
sm4_crypt_block(ctx->rkey_enc, dst, dst);
|
||||
iv = dst;
|
||||
src += SM4_BLOCK_SIZE;
|
||||
dst += SM4_BLOCK_SIZE;
|
||||
nbytes -= SM4_BLOCK_SIZE;
|
||||
}
|
||||
if (iv != walk.iv)
|
||||
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
|
||||
|
||||
int sm4_avx_cbc_decrypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (nbytes >= bsize) {
|
||||
func(ctx->rkey_dec, dst, src, walk.iv);
|
||||
dst += bsize;
|
||||
src += bsize;
|
||||
nbytes -= bsize;
|
||||
}
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
u8 keystream[SM4_BLOCK_SIZE * 8];
|
||||
u8 iv[SM4_BLOCK_SIZE];
|
||||
unsigned int nblocks = min(nbytes >> 4, 8u);
|
||||
int i;
|
||||
|
||||
sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
|
||||
src, nblocks);
|
||||
|
||||
src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
|
||||
dst += (nblocks - 1) * SM4_BLOCK_SIZE;
|
||||
memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
|
||||
|
||||
for (i = nblocks - 1; i > 0; i--) {
|
||||
crypto_xor_cpy(dst, src,
|
||||
&keystream[i * SM4_BLOCK_SIZE],
|
||||
SM4_BLOCK_SIZE);
|
||||
src -= SM4_BLOCK_SIZE;
|
||||
dst -= SM4_BLOCK_SIZE;
|
||||
}
|
||||
crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
|
||||
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
|
||||
dst += nblocks * SM4_BLOCK_SIZE;
|
||||
src += (nblocks + 1) * SM4_BLOCK_SIZE;
|
||||
nbytes -= nblocks * SM4_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
|
||||
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
|
||||
sm4_aesni_avx_cbc_dec_blk8);
|
||||
}
|
||||
|
||||
int sm4_cfb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
u8 keystream[SM4_BLOCK_SIZE];
|
||||
const u8 *iv = walk.iv;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
sm4_crypt_block(ctx->rkey_enc, keystream, iv);
|
||||
crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
|
||||
iv = dst;
|
||||
src += SM4_BLOCK_SIZE;
|
||||
dst += SM4_BLOCK_SIZE;
|
||||
nbytes -= SM4_BLOCK_SIZE;
|
||||
}
|
||||
if (iv != walk.iv)
|
||||
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
|
||||
|
||||
/* tail */
|
||||
if (walk.nbytes == walk.total && nbytes > 0) {
|
||||
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
|
||||
crypto_xor_cpy(dst, src, keystream, nbytes);
|
||||
nbytes = 0;
|
||||
}
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_cfb_encrypt);
|
||||
|
||||
int sm4_avx_cfb_decrypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (nbytes >= bsize) {
|
||||
func(ctx->rkey_enc, dst, src, walk.iv);
|
||||
dst += bsize;
|
||||
src += bsize;
|
||||
nbytes -= bsize;
|
||||
}
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
u8 keystream[SM4_BLOCK_SIZE * 8];
|
||||
unsigned int nblocks = min(nbytes >> 4, 8u);
|
||||
|
||||
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
|
||||
if (nblocks > 1)
|
||||
memcpy(&keystream[SM4_BLOCK_SIZE], src,
|
||||
(nblocks - 1) * SM4_BLOCK_SIZE);
|
||||
memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE,
|
||||
SM4_BLOCK_SIZE);
|
||||
|
||||
sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
|
||||
keystream, nblocks);
|
||||
|
||||
crypto_xor_cpy(dst, src, keystream,
|
||||
nblocks * SM4_BLOCK_SIZE);
|
||||
dst += nblocks * SM4_BLOCK_SIZE;
|
||||
src += nblocks * SM4_BLOCK_SIZE;
|
||||
nbytes -= nblocks * SM4_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
|
||||
/* tail */
|
||||
if (walk.nbytes == walk.total && nbytes > 0) {
|
||||
u8 keystream[SM4_BLOCK_SIZE];
|
||||
|
||||
sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
|
||||
crypto_xor_cpy(dst, src, keystream, nbytes);
|
||||
nbytes = 0;
|
||||
}
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt);
|
||||
|
||||
static int cfb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
|
||||
sm4_aesni_avx_cfb_dec_blk8);
|
||||
}
|
||||
|
||||
int sm4_avx_ctr_crypt(struct skcipher_request *req,
|
||||
unsigned int bsize, sm4_crypt_func func)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (nbytes >= bsize) {
|
||||
func(ctx->rkey_enc, dst, src, walk.iv);
|
||||
dst += bsize;
|
||||
src += bsize;
|
||||
nbytes -= bsize;
|
||||
}
|
||||
|
||||
while (nbytes >= SM4_BLOCK_SIZE) {
|
||||
u8 keystream[SM4_BLOCK_SIZE * 8];
|
||||
unsigned int nblocks = min(nbytes >> 4, 8u);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nblocks; i++) {
|
||||
memcpy(&keystream[i * SM4_BLOCK_SIZE],
|
||||
walk.iv, SM4_BLOCK_SIZE);
|
||||
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
|
||||
}
|
||||
sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
|
||||
keystream, nblocks);
|
||||
|
||||
crypto_xor_cpy(dst, src, keystream,
|
||||
nblocks * SM4_BLOCK_SIZE);
|
||||
dst += nblocks * SM4_BLOCK_SIZE;
|
||||
src += nblocks * SM4_BLOCK_SIZE;
|
||||
nbytes -= nblocks * SM4_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
|
||||
/* tail */
|
||||
if (walk.nbytes == walk.total && nbytes > 0) {
|
||||
u8 keystream[SM4_BLOCK_SIZE];
|
||||
|
||||
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
|
||||
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
|
||||
|
||||
sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
|
||||
|
||||
crypto_xor_cpy(dst, src, keystream, nbytes);
|
||||
dst += nbytes;
|
||||
src += nbytes;
|
||||
nbytes = 0;
|
||||
}
|
||||
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
|
||||
|
||||
static int ctr_crypt(struct skcipher_request *req)
|
||||
{
|
||||
return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
|
||||
sm4_aesni_avx_ctr_enc_blk8);
|
||||
}
|
||||
|
||||
static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
|
||||
{
|
||||
.base = {
|
||||
.cra_name = "__ecb(sm4)",
|
||||
.cra_driver_name = "__ecb-sm4-aesni-avx",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.walksize = 8 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_avx_ecb_encrypt,
|
||||
.decrypt = sm4_avx_ecb_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cbc(sm4)",
|
||||
.cra_driver_name = "__cbc-sm4-aesni-avx",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.walksize = 8 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cfb(sm4)",
|
||||
.cra_driver_name = "__cfb-sm4-aesni-avx",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.chunksize = SM4_BLOCK_SIZE,
|
||||
.walksize = 8 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = sm4_cfb_encrypt,
|
||||
.decrypt = cfb_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__ctr(sm4)",
|
||||
.cra_driver_name = "__ctr-sm4-aesni-avx",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = SM4_KEY_SIZE,
|
||||
.max_keysize = SM4_KEY_SIZE,
|
||||
.ivsize = SM4_BLOCK_SIZE,
|
||||
.chunksize = SM4_BLOCK_SIZE,
|
||||
.walksize = 8 * SM4_BLOCK_SIZE,
|
||||
.setkey = sm4_skcipher_setkey,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
}
|
||||
};
|
||||
|
||||
static struct simd_skcipher_alg *
|
||||
simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
|
||||
|
||||
static int __init sm4_init(void)
|
||||
{
|
||||
const char *feature_name;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_AVX) ||
|
||||
!boot_cpu_has(X86_FEATURE_AES) ||
|
||||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
|
||||
pr_info("AVX or AES-NI instructions are not detected.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
|
||||
&feature_name)) {
|
||||
pr_info("CPU feature '%s' is not supported.\n", feature_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
|
||||
ARRAY_SIZE(sm4_aesni_avx_skciphers),
|
||||
simd_sm4_aesni_avx_skciphers);
|
||||
}
|
||||
|
||||
static void __exit sm4_exit(void)
|
||||
{
|
||||
simd_unregister_skciphers(sm4_aesni_avx_skciphers,
|
||||
ARRAY_SIZE(sm4_aesni_avx_skciphers),
|
||||
simd_sm4_aesni_avx_skciphers);
|
||||
}
|
||||
|
||||
module_init(sm4_init);
|
||||
module_exit(sm4_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
|
||||
MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
|
||||
MODULE_ALIAS_CRYPTO("sm4");
|
||||
MODULE_ALIAS_CRYPTO("sm4-aesni-avx");
|
@ -1547,6 +1547,7 @@ config CRYPTO_SERPENT_AVX2_X86_64
|
||||
config CRYPTO_SM4
|
||||
tristate "SM4 cipher algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_LIB_SM4
|
||||
help
|
||||
SM4 cipher algorithms (OSCCA GB/T 32907-2016).
|
||||
|
||||
@ -1569,6 +1570,49 @@ config CRYPTO_SM4
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_SM4_AESNI_AVX_X86_64
|
||||
tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_SIMD
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_LIB_SM4
|
||||
help
|
||||
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX).
|
||||
|
||||
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
|
||||
Organization of State Commercial Administration of China (OSCCA)
|
||||
as an authorized cryptographic algorithms for the use within China.
|
||||
|
||||
This is SM4 optimized implementation using AES-NI/AVX/x86_64
|
||||
instruction set for block cipher. Through two affine transforms,
|
||||
we can use the AES S-Box to simulate the SM4 S-Box to achieve the
|
||||
effect of instruction acceleration.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_SM4_AESNI_AVX2_X86_64
|
||||
tristate "SM4 cipher algorithm (x86_64/AES-NI/AVX2)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_SIMD
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_LIB_SM4
|
||||
select CRYPTO_SM4_AESNI_AVX_X86_64
|
||||
help
|
||||
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2).
|
||||
|
||||
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
|
||||
Organization of State Commercial Administration of China (OSCCA)
|
||||
as an authorized cryptographic algorithms for the use within China.
|
||||
|
||||
This is SM4 optimized implementation using AES-NI/AVX2/x86_64
|
||||
instruction set for block cipher. Through two affine transforms,
|
||||
we can use the AES S-Box to simulate the SM4 S-Box to achieve the
|
||||
effect of instruction acceleration.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_TEA
|
||||
tristate "TEA, XTEA and XETA cipher algorithms"
|
||||
depends on CRYPTO_USER_API_ENABLE_OBSOLETE
|
||||
|
@ -74,7 +74,6 @@ obj-$(CONFIG_CRYPTO_NULL2) += crypto_null.o
|
||||
obj-$(CONFIG_CRYPTO_MD4) += md4.o
|
||||
obj-$(CONFIG_CRYPTO_MD5) += md5.o
|
||||
obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o
|
||||
obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
|
||||
|
@ -27,6 +27,7 @@
|
||||
#define _CRYPTO_ECC_H
|
||||
|
||||
#include <crypto/ecc_curve.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
/* One digit is u64 qword. */
|
||||
#define ECC_CURVE_NIST_P192_DIGITS 3
|
||||
@ -46,13 +47,13 @@
|
||||
* @out: Output array
|
||||
* @ndigits: Number of digits to copy
|
||||
*/
|
||||
static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits)
|
||||
static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigits)
|
||||
{
|
||||
const __be64 *src = (__force __be64 *)in;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ndigits; i++)
|
||||
out[i] = be64_to_cpu(src[ndigits - 1 - i]);
|
||||
out[i] = get_unaligned_be64(&src[ndigits - 1 - i]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -143,9 +143,6 @@ sha512_transform(u64 *state, const u8 *input)
|
||||
|
||||
state[0] += a; state[1] += b; state[2] += c; state[3] += d;
|
||||
state[4] += e; state[5] += f; state[6] += g; state[7] += h;
|
||||
|
||||
/* erase our data */
|
||||
a = b = c = d = e = f = g = h = t1 = t2 = 0;
|
||||
}
|
||||
|
||||
static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
|
||||
|
@ -431,7 +431,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk)
|
||||
|
||||
static int skcipher_walk_first(struct skcipher_walk *walk)
|
||||
{
|
||||
if (WARN_ON_ONCE(in_irq()))
|
||||
if (WARN_ON_ONCE(in_hardirq()))
|
||||
return -EDEADLK;
|
||||
|
||||
walk->buffer = NULL;
|
||||
|
@ -16,191 +16,43 @@
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
static const u32 fk[4] = {
|
||||
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
|
||||
};
|
||||
|
||||
static const u8 sbox[256] = {
|
||||
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
|
||||
0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
|
||||
0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
|
||||
0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
|
||||
0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
|
||||
0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
|
||||
0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
|
||||
0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
|
||||
0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
|
||||
0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
|
||||
0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
|
||||
0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
|
||||
0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
|
||||
0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
|
||||
0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
|
||||
0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
|
||||
0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
|
||||
0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
|
||||
0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
|
||||
0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
|
||||
0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
|
||||
0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
|
||||
0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
|
||||
0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
|
||||
0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
|
||||
0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
|
||||
0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
|
||||
0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
|
||||
0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
|
||||
0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
|
||||
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
|
||||
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
|
||||
};
|
||||
|
||||
static const u32 ck[] = {
|
||||
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
|
||||
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
|
||||
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
|
||||
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
|
||||
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
|
||||
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
|
||||
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
|
||||
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
|
||||
};
|
||||
|
||||
static u32 sm4_t_non_lin_sub(u32 x)
|
||||
{
|
||||
int i;
|
||||
u8 *b = (u8 *)&x;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
b[i] = sbox[b[i]];
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
static u32 sm4_key_lin_sub(u32 x)
|
||||
{
|
||||
return x ^ rol32(x, 13) ^ rol32(x, 23);
|
||||
|
||||
}
|
||||
|
||||
static u32 sm4_enc_lin_sub(u32 x)
|
||||
{
|
||||
return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
|
||||
}
|
||||
|
||||
static u32 sm4_key_sub(u32 x)
|
||||
{
|
||||
return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
|
||||
}
|
||||
|
||||
static u32 sm4_enc_sub(u32 x)
|
||||
{
|
||||
return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
|
||||
}
|
||||
|
||||
static u32 sm4_round(const u32 *x, const u32 rk)
|
||||
{
|
||||
return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016
|
||||
* @ctx: The location where the computed key will be stored.
|
||||
* @in_key: The supplied key.
|
||||
* @key_len: The length of the supplied key.
|
||||
*
|
||||
* Returns 0 on success. The function fails only if an invalid key size (or
|
||||
* pointer) is supplied.
|
||||
*/
|
||||
int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
u32 rk[4], t;
|
||||
const u32 *key = (u32 *)in_key;
|
||||
int i;
|
||||
|
||||
if (key_len != SM4_KEY_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
rk[i] = get_unaligned_be32(&key[i]) ^ fk[i];
|
||||
|
||||
for (i = 0; i < 32; ++i) {
|
||||
t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]);
|
||||
ctx->rkey_enc[i] = t;
|
||||
rk[0] = rk[1];
|
||||
rk[1] = rk[2];
|
||||
rk[2] = rk[3];
|
||||
rk[3] = t;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; ++i)
|
||||
ctx->rkey_dec[i] = ctx->rkey_enc[31 - i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
|
||||
|
||||
/**
|
||||
* crypto_sm4_set_key - Set the SM4 key.
|
||||
* sm4_setkey - Set the SM4 key.
|
||||
* @tfm: The %crypto_tfm that is used in the context.
|
||||
* @in_key: The input key.
|
||||
* @key_len: The size of the key.
|
||||
*
|
||||
* This function uses crypto_sm4_expand_key() to expand the key.
|
||||
* &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
|
||||
* This function uses sm4_expandkey() to expand the key.
|
||||
* &sm4_ctx _must_ be the private data embedded in @tfm which is
|
||||
* retrieved with crypto_tfm_ctx().
|
||||
*
|
||||
* Return: 0 on success; -EINVAL on failure (only happens for bad key lengths)
|
||||
*/
|
||||
int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
return crypto_sm4_expand_key(ctx, in_key, key_len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
|
||||
|
||||
static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
|
||||
{
|
||||
u32 x[4], i, t;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
x[i] = get_unaligned_be32(&in[i]);
|
||||
|
||||
for (i = 0; i < 32; ++i) {
|
||||
t = sm4_round(x, rk[i]);
|
||||
x[0] = x[1];
|
||||
x[1] = x[2];
|
||||
x[2] = x[3];
|
||||
x[3] = t;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
put_unaligned_be32(x[3 - i], &out[i]);
|
||||
return sm4_expandkey(ctx, in_key, key_len);
|
||||
}
|
||||
|
||||
/* encrypt a block of text */
|
||||
|
||||
void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
|
||||
sm4_crypt_block(ctx->rkey_enc, out, in);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_encrypt);
|
||||
|
||||
/* decrypt a block of text */
|
||||
|
||||
void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
|
||||
sm4_crypt_block(ctx->rkey_dec, out, in);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_sm4_decrypt);
|
||||
|
||||
static struct crypto_alg sm4_alg = {
|
||||
.cra_name = "sm4",
|
||||
@ -208,15 +60,15 @@ static struct crypto_alg sm4_alg = {
|
||||
.cra_priority = 100,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
|
||||
.cra_ctxsize = sizeof(struct sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = SM4_KEY_SIZE,
|
||||
.cia_max_keysize = SM4_KEY_SIZE,
|
||||
.cia_setkey = crypto_sm4_set_key,
|
||||
.cia_encrypt = crypto_sm4_encrypt,
|
||||
.cia_decrypt = crypto_sm4_decrypt
|
||||
.cia_setkey = sm4_setkey,
|
||||
.cia_encrypt = sm4_encrypt,
|
||||
.cia_decrypt = sm4_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
100
crypto/tcrypt.c
100
crypto/tcrypt.c
@ -77,7 +77,7 @@ static const char *check[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static const int block_sizes[] = { 16, 64, 256, 1024, 1420, 4096, 0 };
|
||||
static const int block_sizes[] = { 16, 64, 128, 256, 1024, 1420, 4096, 0 };
|
||||
static const int aead_sizes[] = { 16, 64, 256, 512, 1024, 1420, 4096, 8192, 0 };
|
||||
|
||||
#define XBUFSIZE 8
|
||||
@ -290,6 +290,11 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
|
||||
}
|
||||
|
||||
ret = crypto_aead_setauthsize(tfm, authsize);
|
||||
if (ret) {
|
||||
pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
|
||||
ret);
|
||||
goto out_free_tfm;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_mb; ++i)
|
||||
if (testmgr_alloc_buf(data[i].xbuf)) {
|
||||
@ -315,7 +320,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
|
||||
for (i = 0; i < num_mb; ++i) {
|
||||
data[i].req = aead_request_alloc(tfm, GFP_KERNEL);
|
||||
if (!data[i].req) {
|
||||
pr_err("alg: skcipher: Failed to allocate request for %s\n",
|
||||
pr_err("alg: aead: Failed to allocate request for %s\n",
|
||||
algo);
|
||||
while (i--)
|
||||
aead_request_free(data[i].req);
|
||||
@ -567,13 +572,19 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
|
||||
sgout = &sg[9];
|
||||
|
||||
tfm = crypto_alloc_aead(algo, 0, 0);
|
||||
|
||||
if (IS_ERR(tfm)) {
|
||||
pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo,
|
||||
PTR_ERR(tfm));
|
||||
goto out_notfm;
|
||||
}
|
||||
|
||||
ret = crypto_aead_setauthsize(tfm, authsize);
|
||||
if (ret) {
|
||||
pr_err("alg: aead: Failed to setauthsize for %s: %d\n", algo,
|
||||
ret);
|
||||
goto out_noreq;
|
||||
}
|
||||
|
||||
crypto_init_wait(&wait);
|
||||
printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
|
||||
get_driver_name(crypto_aead, tfm), e);
|
||||
@ -611,8 +622,13 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ret = crypto_aead_setkey(tfm, key, *keysize);
|
||||
ret = crypto_aead_setauthsize(tfm, authsize);
|
||||
if (ret) {
|
||||
pr_err("setkey() failed flags=%x: %d\n",
|
||||
crypto_aead_get_flags(tfm), ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
iv_len = crypto_aead_ivsize(tfm);
|
||||
if (iv_len)
|
||||
@ -622,15 +638,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
|
||||
printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
|
||||
i, *keysize * 8, bs);
|
||||
|
||||
|
||||
memset(tvmem[0], 0xff, PAGE_SIZE);
|
||||
|
||||
if (ret) {
|
||||
pr_err("setkey() failed flags=%x\n",
|
||||
crypto_aead_get_flags(tfm));
|
||||
goto out;
|
||||
}
|
||||
|
||||
sg_init_aead(sg, xbuf, bs + (enc ? 0 : authsize),
|
||||
assoc, aad_size);
|
||||
|
||||
@ -1907,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("streebog512");
|
||||
break;
|
||||
|
||||
case 55:
|
||||
ret += tcrypt_test("gcm(sm4)");
|
||||
break;
|
||||
|
||||
case 56:
|
||||
ret += tcrypt_test("ccm(sm4)");
|
||||
break;
|
||||
|
||||
case 100:
|
||||
ret += tcrypt_test("hmac(md5)");
|
||||
break;
|
||||
@ -1998,6 +2015,15 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
case 157:
|
||||
ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
|
||||
break;
|
||||
|
||||
case 158:
|
||||
ret += tcrypt_test("cbcmac(sm4)");
|
||||
break;
|
||||
|
||||
case 159:
|
||||
ret += tcrypt_test("cmac(sm4)");
|
||||
break;
|
||||
|
||||
case 181:
|
||||
ret += tcrypt_test("authenc(hmac(sha1),cbc(des))");
|
||||
break;
|
||||
@ -2031,6 +2057,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
case 191:
|
||||
ret += tcrypt_test("ecb(sm4)");
|
||||
ret += tcrypt_test("cbc(sm4)");
|
||||
ret += tcrypt_test("cfb(sm4)");
|
||||
ret += tcrypt_test("ctr(sm4)");
|
||||
break;
|
||||
case 200:
|
||||
@ -2289,6 +2316,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
speed_template_16);
|
||||
test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
|
||||
@ -2322,6 +2353,34 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
NULL, 0, 16, 8, speed_template_16);
|
||||
break;
|
||||
|
||||
case 222:
|
||||
test_aead_speed("gcm(sm4)", ENCRYPT, sec,
|
||||
NULL, 0, 16, 8, speed_template_16);
|
||||
test_aead_speed("gcm(sm4)", DECRYPT, sec,
|
||||
NULL, 0, 16, 8, speed_template_16);
|
||||
break;
|
||||
|
||||
case 223:
|
||||
test_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec,
|
||||
NULL, 0, 16, 16, aead_speed_template_19);
|
||||
test_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec,
|
||||
NULL, 0, 16, 16, aead_speed_template_19);
|
||||
break;
|
||||
|
||||
case 224:
|
||||
test_mb_aead_speed("gcm(sm4)", ENCRYPT, sec, NULL, 0, 16, 8,
|
||||
speed_template_16, num_mb);
|
||||
test_mb_aead_speed("gcm(sm4)", DECRYPT, sec, NULL, 0, 16, 8,
|
||||
speed_template_16, num_mb);
|
||||
break;
|
||||
|
||||
case 225:
|
||||
test_mb_aead_speed("rfc4309(ccm(sm4))", ENCRYPT, sec, NULL, 0,
|
||||
16, 16, aead_speed_template_19, num_mb);
|
||||
test_mb_aead_speed("rfc4309(ccm(sm4))", DECRYPT, sec, NULL, 0,
|
||||
16, 16, aead_speed_template_19, num_mb);
|
||||
break;
|
||||
|
||||
case 300:
|
||||
if (alg) {
|
||||
test_hash_speed(alg, sec, generic_hash_speed_template);
|
||||
@ -2757,6 +2816,25 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
speed_template_8_32);
|
||||
break;
|
||||
|
||||
case 518:
|
||||
test_acipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
break;
|
||||
|
||||
case 600:
|
||||
test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16_24_32, num_mb);
|
||||
|
@ -4450,6 +4450,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(aes_cbcmac_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "cbcmac(sm4)",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(sm4_cbcmac_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "ccm(aes)",
|
||||
.generic_driver = "ccm_base(ctr(aes-generic),cbcmac(aes-generic))",
|
||||
@ -4461,6 +4467,16 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.einval_allowed = 1,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.alg = "ccm(sm4)",
|
||||
.generic_driver = "ccm_base(ctr(sm4-generic),cbcmac(sm4-generic))",
|
||||
.test = alg_test_aead,
|
||||
.suite = {
|
||||
.aead = {
|
||||
____VECS(sm4_ccm_tv_template),
|
||||
.einval_allowed = 1,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.alg = "cfb(aes)",
|
||||
.test = alg_test_skcipher,
|
||||
@ -4494,6 +4510,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(des3_ede_cmac64_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "cmac(sm4)",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(sm4_cmac128_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "compress_null",
|
||||
.test = alg_test_null,
|
||||
@ -4967,6 +4989,13 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.aead = __VECS(aes_gcm_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "gcm(sm4)",
|
||||
.generic_driver = "gcm_base(ctr(sm4-generic),ghash-generic)",
|
||||
.test = alg_test_aead,
|
||||
.suite = {
|
||||
.aead = __VECS(sm4_gcm_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "ghash",
|
||||
.test = alg_test_hash,
|
||||
|
148
crypto/testmgr.h
148
crypto/testmgr.h
@ -13328,6 +13328,154 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = {
|
||||
}
|
||||
};
|
||||
|
||||
static const struct aead_testvec sm4_gcm_tv_template[] = {
|
||||
{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */
|
||||
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
|
||||
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
|
||||
.klen = 16,
|
||||
.iv = "\x00\x00\x12\x34\x56\x78\x00\x00"
|
||||
"\x00\x00\xAB\xCD",
|
||||
.ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
|
||||
"\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
|
||||
"\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
|
||||
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
|
||||
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
|
||||
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
|
||||
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
|
||||
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
|
||||
.plen = 64,
|
||||
.assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
|
||||
"\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
|
||||
"\xAB\xAD\xDA\xD2",
|
||||
.alen = 20,
|
||||
.ctext = "\x17\xF3\x99\xF0\x8C\x67\xD5\xEE"
|
||||
"\x19\xD0\xDC\x99\x69\xC4\xBB\x7D"
|
||||
"\x5F\xD4\x6F\xD3\x75\x64\x89\x06"
|
||||
"\x91\x57\xB2\x82\xBB\x20\x07\x35"
|
||||
"\xD8\x27\x10\xCA\x5C\x22\xF0\xCC"
|
||||
"\xFA\x7C\xBF\x93\xD4\x96\xAC\x15"
|
||||
"\xA5\x68\x34\xCB\xCF\x98\xC3\x97"
|
||||
"\xB4\x02\x4A\x26\x91\x23\x3B\x8D"
|
||||
"\x83\xDE\x35\x41\xE4\xC2\xB5\x81"
|
||||
"\x77\xE0\x65\xA9\xBF\x7B\x62\xEC",
|
||||
.clen = 80,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct aead_testvec sm4_ccm_tv_template[] = {
|
||||
{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.2 */
|
||||
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
|
||||
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
|
||||
.klen = 16,
|
||||
.iv = "\x02\x00\x00\x12\x34\x56\x78\x00"
|
||||
"\x00\x00\x00\xAB\xCD\x00\x00\x00",
|
||||
.ptext = "\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA"
|
||||
"\xBB\xBB\xBB\xBB\xBB\xBB\xBB\xBB"
|
||||
"\xCC\xCC\xCC\xCC\xCC\xCC\xCC\xCC"
|
||||
"\xDD\xDD\xDD\xDD\xDD\xDD\xDD\xDD"
|
||||
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
|
||||
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
|
||||
"\xEE\xEE\xEE\xEE\xEE\xEE\xEE\xEE"
|
||||
"\xAA\xAA\xAA\xAA\xAA\xAA\xAA\xAA",
|
||||
.plen = 64,
|
||||
.assoc = "\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
|
||||
"\xFE\xED\xFA\xCE\xDE\xAD\xBE\xEF"
|
||||
"\xAB\xAD\xDA\xD2",
|
||||
.alen = 20,
|
||||
.ctext = "\x48\xAF\x93\x50\x1F\xA6\x2A\xDB"
|
||||
"\xCD\x41\x4C\xCE\x60\x34\xD8\x95"
|
||||
"\xDD\xA1\xBF\x8F\x13\x2F\x04\x20"
|
||||
"\x98\x66\x15\x72\xE7\x48\x30\x94"
|
||||
"\xFD\x12\xE5\x18\xCE\x06\x2C\x98"
|
||||
"\xAC\xEE\x28\xD9\x5D\xF4\x41\x6B"
|
||||
"\xED\x31\xA2\xF0\x44\x76\xC1\x8B"
|
||||
"\xB4\x0C\x84\xA7\x4B\x97\xDC\x5B"
|
||||
"\x16\x84\x2D\x4F\xA1\x86\xF5\x6A"
|
||||
"\xB3\x32\x56\x97\x1F\xA1\x10\xF4",
|
||||
.clen = 80,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct hash_testvec sm4_cbcmac_tv_template[] = {
|
||||
{
|
||||
.key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
|
||||
"\x77\x66\x55\x44\x33\x22\x11\x00",
|
||||
.plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xba\x98\x76\x54\x32\x10",
|
||||
.digest = "\x97\xb4\x75\x8f\x84\x92\x3d\x3f"
|
||||
"\x86\x81\x0e\x0e\xea\x14\x6d\x73",
|
||||
.psize = 16,
|
||||
.ksize = 16,
|
||||
}, {
|
||||
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
|
||||
.plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
|
||||
"\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
|
||||
"\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
|
||||
"\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
|
||||
"\xee",
|
||||
.digest = "\xc7\xdb\x17\x71\xa1\x5c\x0d\x22"
|
||||
"\xa3\x39\x3a\x31\x88\x91\x49\xa1",
|
||||
.psize = 33,
|
||||
.ksize = 16,
|
||||
}, {
|
||||
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
|
||||
.plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
|
||||
"\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
|
||||
"\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
|
||||
"\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
|
||||
"\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
|
||||
"\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
|
||||
"\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
|
||||
"\xfd\xdb\xb1\x9b\x76\x5c\x37",
|
||||
.digest = "\x9b\x07\x88\x7f\xd5\x95\x23\x12"
|
||||
"\x64\x0a\x66\x7f\x4e\x25\xca\xd0",
|
||||
.psize = 63,
|
||||
.ksize = 16,
|
||||
}
|
||||
};
|
||||
|
||||
static const struct hash_testvec sm4_cmac128_tv_template[] = {
|
||||
{
|
||||
.key = "\xff\xee\xdd\xcc\xbb\xaa\x99\x88"
|
||||
"\x77\x66\x55\x44\x33\x22\x11\x00",
|
||||
.plaintext = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xba\x98\x76\x54\x32\x10",
|
||||
.digest = "\x00\xd4\x63\xb4\x9a\xf3\x52\xe2"
|
||||
"\x74\xa9\x00\x55\x13\x54\x2a\xd1",
|
||||
.psize = 16,
|
||||
.ksize = 16,
|
||||
}, {
|
||||
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
|
||||
.plaintext = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
|
||||
"\xbb\xbb\xbb\xbb\xbb\xbb\xbb\xbb"
|
||||
"\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc"
|
||||
"\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
|
||||
"\xee",
|
||||
.digest = "\x8a\x8a\xe9\xc0\xc8\x97\x0e\x85"
|
||||
"\x21\x57\x02\x10\x1a\xbf\x9c\xc6",
|
||||
.psize = 33,
|
||||
.ksize = 16,
|
||||
}, {
|
||||
.key = "\x01\x23\x45\x67\x89\xab\xcd\xef"
|
||||
"\xfe\xdc\xBA\x98\x76\x54\x32\x10",
|
||||
.plaintext = "\xfb\xd1\xbe\x92\x7e\x50\x3f\x16"
|
||||
"\xf9\xdd\xbe\x91\x73\x53\x37\x1a"
|
||||
"\xfe\xdd\xba\x97\x7e\x53\x3c\x1c"
|
||||
"\xfe\xd7\xbf\x9c\x75\x5f\x3e\x11"
|
||||
"\xf0\xd8\xbc\x96\x73\x5c\x34\x11"
|
||||
"\xf5\xdb\xb1\x99\x7a\x5a\x32\x1f"
|
||||
"\xf6\xdf\xb4\x95\x7f\x5f\x3b\x17"
|
||||
"\xfd\xdb\xb1\x9b\x76\x5c\x37",
|
||||
.digest = "\x5f\x14\xc9\xa9\x20\xb2\xb4\xf0"
|
||||
"\x76\xe0\xd8\xd6\xdc\x4f\xe1\xbc",
|
||||
.psize = 63,
|
||||
.ksize = 16,
|
||||
}
|
||||
};
|
||||
|
||||
/* Cast6 test vectors from RFC 2612 */
|
||||
static const struct cipher_testvec cast6_tv_template[] = {
|
||||
{
|
||||
|
@ -775,7 +775,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = {
|
||||
0xca2dbf07ad5a8333ULL,
|
||||
};
|
||||
|
||||
/**
|
||||
/*
|
||||
* The core Whirlpool transform.
|
||||
*/
|
||||
|
||||
|
@ -524,6 +524,20 @@ config HW_RANDOM_XIPHERA
|
||||
To compile this driver as a module, choose M here: the
|
||||
module will be called xiphera-trng.
|
||||
|
||||
config HW_RANDOM_ARM_SMCCC_TRNG
|
||||
tristate "Arm SMCCC TRNG firmware interface support"
|
||||
depends on HAVE_ARM_SMCCC_DISCOVERY
|
||||
default HW_RANDOM
|
||||
help
|
||||
Say 'Y' to enable the True Random Number Generator driver using
|
||||
the Arm SMCCC TRNG firmware interface. This reads entropy from
|
||||
higher exception levels (firmware, hypervisor). Uses SMCCC for
|
||||
communicating with the firmware:
|
||||
https://developer.arm.com/documentation/den0098/latest/
|
||||
|
||||
To compile this driver as a module, choose M here: the
|
||||
module will be called arm_smccc_trng.
|
||||
|
||||
endif # HW_RANDOM
|
||||
|
||||
config UML_RANDOM
|
||||
|
@ -45,3 +45,4 @@ obj-$(CONFIG_HW_RANDOM_OPTEE) += optee-rng.o
|
||||
obj-$(CONFIG_HW_RANDOM_NPCM) += npcm-rng.o
|
||||
obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o
|
||||
obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o
|
||||
obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o
|
||||
|
@ -124,7 +124,7 @@ static struct hwrng amd_rng = {
|
||||
.read = amd_rng_read,
|
||||
};
|
||||
|
||||
static int __init mod_init(void)
|
||||
static int __init amd_rng_mod_init(void)
|
||||
{
|
||||
int err;
|
||||
struct pci_dev *pdev = NULL;
|
||||
@ -188,7 +188,7 @@ static int __init mod_init(void)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
static void __exit amd_rng_mod_exit(void)
|
||||
{
|
||||
struct amd768_priv *priv;
|
||||
|
||||
@ -203,8 +203,8 @@ static void __exit mod_exit(void)
|
||||
kfree(priv);
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
module_init(amd_rng_mod_init);
|
||||
module_exit(amd_rng_mod_exit);
|
||||
|
||||
MODULE_AUTHOR("The Linux Kernel team");
|
||||
MODULE_DESCRIPTION("H/W RNG driver for AMD chipsets");
|
||||
|
123
drivers/char/hw_random/arm_smccc_trng.c
Normal file
123
drivers/char/hw_random/arm_smccc_trng.c
Normal file
@ -0,0 +1,123 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Randomness driver for the ARM SMCCC TRNG Firmware Interface
|
||||
* https://developer.arm.com/documentation/den0098/latest/
|
||||
*
|
||||
* Copyright (C) 2020 Arm Ltd.
|
||||
*
|
||||
* The ARM TRNG firmware interface specifies a protocol to read entropy
|
||||
* from a higher exception level, to abstract from any machine specific
|
||||
* implemenations and allow easier use in hypervisors.
|
||||
*
|
||||
* The firmware interface is realised using the SMCCC specification.
|
||||
*/
|
||||
|
||||
#include <linux/bits.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/hw_random.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/arm-smccc.h>
|
||||
|
||||
#ifdef CONFIG_ARM64
|
||||
#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND64
|
||||
#define MAX_BITS_PER_CALL (3 * 64UL)
|
||||
#else
|
||||
#define ARM_SMCCC_TRNG_RND ARM_SMCCC_TRNG_RND32
|
||||
#define MAX_BITS_PER_CALL (3 * 32UL)
|
||||
#endif
|
||||
|
||||
/* We don't want to allow the firmware to stall us forever. */
|
||||
#define SMCCC_TRNG_MAX_TRIES 20
|
||||
|
||||
#define SMCCC_RET_TRNG_INVALID_PARAMETER -2
|
||||
#define SMCCC_RET_TRNG_NO_ENTROPY -3
|
||||
|
||||
static int copy_from_registers(char *buf, struct arm_smccc_res *res,
|
||||
size_t bytes)
|
||||
{
|
||||
unsigned int chunk, copied;
|
||||
|
||||
if (bytes == 0)
|
||||
return 0;
|
||||
|
||||
chunk = min(bytes, sizeof(long));
|
||||
memcpy(buf, &res->a3, chunk);
|
||||
copied = chunk;
|
||||
if (copied >= bytes)
|
||||
return copied;
|
||||
|
||||
chunk = min((bytes - copied), sizeof(long));
|
||||
memcpy(&buf[copied], &res->a2, chunk);
|
||||
copied += chunk;
|
||||
if (copied >= bytes)
|
||||
return copied;
|
||||
|
||||
chunk = min((bytes - copied), sizeof(long));
|
||||
memcpy(&buf[copied], &res->a1, chunk);
|
||||
|
||||
return copied + chunk;
|
||||
}
|
||||
|
||||
static int smccc_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
|
||||
{
|
||||
struct arm_smccc_res res;
|
||||
u8 *buf = data;
|
||||
unsigned int copied = 0;
|
||||
int tries = 0;
|
||||
|
||||
while (copied < max) {
|
||||
size_t bits = min_t(size_t, (max - copied) * BITS_PER_BYTE,
|
||||
MAX_BITS_PER_CALL);
|
||||
|
||||
arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND, bits, &res);
|
||||
if ((int)res.a0 < 0)
|
||||
return (int)res.a0;
|
||||
|
||||
switch ((int)res.a0) {
|
||||
case SMCCC_RET_SUCCESS:
|
||||
copied += copy_from_registers(buf + copied, &res,
|
||||
bits / BITS_PER_BYTE);
|
||||
tries = 0;
|
||||
break;
|
||||
case SMCCC_RET_TRNG_NO_ENTROPY:
|
||||
if (!wait)
|
||||
return copied;
|
||||
tries++;
|
||||
if (tries >= SMCCC_TRNG_MAX_TRIES)
|
||||
return copied;
|
||||
cond_resched();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int smccc_trng_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct hwrng *trng;
|
||||
|
||||
trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
|
||||
if (!trng)
|
||||
return -ENOMEM;
|
||||
|
||||
trng->name = "smccc_trng";
|
||||
trng->read = smccc_trng_read;
|
||||
|
||||
platform_set_drvdata(pdev, trng);
|
||||
|
||||
return devm_hwrng_register(&pdev->dev, trng);
|
||||
}
|
||||
|
||||
static struct platform_driver smccc_trng_driver = {
|
||||
.driver = {
|
||||
.name = "smccc_trng",
|
||||
},
|
||||
.probe = smccc_trng_probe,
|
||||
};
|
||||
module_platform_driver(smccc_trng_driver);
|
||||
|
||||
MODULE_ALIAS("platform:smccc_trng");
|
||||
MODULE_AUTHOR("Andre Przywara");
|
||||
MODULE_LICENSE("GPL");
|
@ -83,7 +83,7 @@ static struct hwrng geode_rng = {
|
||||
};
|
||||
|
||||
|
||||
static int __init mod_init(void)
|
||||
static int __init geode_rng_init(void)
|
||||
{
|
||||
int err = -ENODEV;
|
||||
struct pci_dev *pdev = NULL;
|
||||
@ -124,7 +124,7 @@ static int __init mod_init(void)
|
||||
goto out;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
static void __exit geode_rng_exit(void)
|
||||
{
|
||||
void __iomem *mem = (void __iomem *)geode_rng.priv;
|
||||
|
||||
@ -132,8 +132,8 @@ static void __exit mod_exit(void)
|
||||
iounmap(mem);
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
module_init(geode_rng_init);
|
||||
module_exit(geode_rng_exit);
|
||||
|
||||
MODULE_DESCRIPTION("H/W RNG driver for AMD Geode LX CPUs");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -325,7 +325,7 @@ PFX "RNG, try using the 'no_fwh_detect' option.\n";
|
||||
}
|
||||
|
||||
|
||||
static int __init mod_init(void)
|
||||
static int __init intel_rng_mod_init(void)
|
||||
{
|
||||
int err = -ENODEV;
|
||||
int i;
|
||||
@ -403,7 +403,7 @@ static int __init mod_init(void)
|
||||
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
static void __exit intel_rng_mod_exit(void)
|
||||
{
|
||||
void __iomem *mem = (void __iomem *)intel_rng.priv;
|
||||
|
||||
@ -411,8 +411,8 @@ static void __exit mod_exit(void)
|
||||
iounmap(mem);
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
module_init(intel_rng_mod_init);
|
||||
module_exit(intel_rng_mod_exit);
|
||||
|
||||
MODULE_DESCRIPTION("H/W RNG driver for Intel chipsets");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -192,7 +192,7 @@ static struct hwrng via_rng = {
|
||||
};
|
||||
|
||||
|
||||
static int __init mod_init(void)
|
||||
static int __init via_rng_mod_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
@ -209,13 +209,13 @@ static int __init mod_init(void)
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
module_init(mod_init);
|
||||
module_init(via_rng_mod_init);
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
static void __exit via_rng_mod_exit(void)
|
||||
{
|
||||
hwrng_unregister(&via_rng);
|
||||
}
|
||||
module_exit(mod_exit);
|
||||
module_exit(via_rng_mod_exit);
|
||||
|
||||
static struct x86_cpu_id __maybe_unused via_rng_cpu_id[] = {
|
||||
X86_MATCH_FEATURE(X86_FEATURE_XSTORE, NULL),
|
||||
|
@ -26,8 +26,7 @@ void sun8i_ce_prng_exit(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct sun8i_ce_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
memzero_explicit(ctx->seed, ctx->slen);
|
||||
kfree(ctx->seed);
|
||||
kfree_sensitive(ctx->seed);
|
||||
ctx->seed = NULL;
|
||||
ctx->slen = 0;
|
||||
}
|
||||
@ -38,8 +37,7 @@ int sun8i_ce_prng_seed(struct crypto_rng *tfm, const u8 *seed,
|
||||
struct sun8i_ce_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm);
|
||||
|
||||
if (ctx->seed && ctx->slen != slen) {
|
||||
memzero_explicit(ctx->seed, ctx->slen);
|
||||
kfree(ctx->seed);
|
||||
kfree_sensitive(ctx->seed);
|
||||
ctx->slen = 0;
|
||||
ctx->seed = NULL;
|
||||
}
|
||||
@ -157,9 +155,8 @@ int sun8i_ce_prng_generate(struct crypto_rng *tfm, const u8 *src,
|
||||
memcpy(dst, d, dlen);
|
||||
memcpy(ctx->seed, d + dlen, ctx->slen);
|
||||
}
|
||||
memzero_explicit(d, todo);
|
||||
err_iv:
|
||||
kfree(d);
|
||||
kfree_sensitive(d);
|
||||
err_mem:
|
||||
return err;
|
||||
}
|
||||
|
@ -95,9 +95,8 @@ static int sun8i_ce_trng_read(struct hwrng *rng, void *data, size_t max, bool wa
|
||||
memcpy(data, d, max);
|
||||
err = max;
|
||||
}
|
||||
memzero_explicit(d, todo);
|
||||
err_dst:
|
||||
kfree(d);
|
||||
kfree_sensitive(d);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -20,8 +20,7 @@ int sun8i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed,
|
||||
struct sun8i_ss_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm);
|
||||
|
||||
if (ctx->seed && ctx->slen != slen) {
|
||||
memzero_explicit(ctx->seed, ctx->slen);
|
||||
kfree(ctx->seed);
|
||||
kfree_sensitive(ctx->seed);
|
||||
ctx->slen = 0;
|
||||
ctx->seed = NULL;
|
||||
}
|
||||
@ -48,8 +47,7 @@ void sun8i_ss_prng_exit(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct sun8i_ss_rng_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
memzero_explicit(ctx->seed, ctx->slen);
|
||||
kfree(ctx->seed);
|
||||
kfree_sensitive(ctx->seed);
|
||||
ctx->seed = NULL;
|
||||
ctx->slen = 0;
|
||||
}
|
||||
@ -167,9 +165,8 @@ int sun8i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
|
||||
/* Update seed */
|
||||
memcpy(ctx->seed, d + dlen, ctx->slen);
|
||||
}
|
||||
memzero_explicit(d, todo);
|
||||
err_free:
|
||||
kfree(d);
|
||||
kfree_sensitive(d);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -143,6 +143,7 @@ struct atmel_aes_xts_ctx {
|
||||
struct atmel_aes_base_ctx base;
|
||||
|
||||
u32 key2[AES_KEYSIZE_256 / sizeof(u32)];
|
||||
struct crypto_skcipher *fallback_tfm;
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
|
||||
@ -155,6 +156,7 @@ struct atmel_aes_authenc_ctx {
|
||||
struct atmel_aes_reqctx {
|
||||
unsigned long mode;
|
||||
u8 lastc[AES_BLOCK_SIZE];
|
||||
struct skcipher_request fallback_req;
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
|
||||
@ -418,24 +420,15 @@ static inline size_t atmel_aes_padlen(size_t len, size_t block_size)
|
||||
return len ? block_size - len : 0;
|
||||
}
|
||||
|
||||
static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_base_ctx *ctx)
|
||||
static struct atmel_aes_dev *atmel_aes_dev_alloc(struct atmel_aes_base_ctx *ctx)
|
||||
{
|
||||
struct atmel_aes_dev *aes_dd = NULL;
|
||||
struct atmel_aes_dev *tmp;
|
||||
struct atmel_aes_dev *aes_dd;
|
||||
|
||||
spin_lock_bh(&atmel_aes.lock);
|
||||
if (!ctx->dd) {
|
||||
list_for_each_entry(tmp, &atmel_aes.dev_list, list) {
|
||||
aes_dd = tmp;
|
||||
break;
|
||||
}
|
||||
ctx->dd = aes_dd;
|
||||
} else {
|
||||
aes_dd = ctx->dd;
|
||||
}
|
||||
|
||||
/* One AES IP per SoC. */
|
||||
aes_dd = list_first_entry_or_null(&atmel_aes.dev_list,
|
||||
struct atmel_aes_dev, list);
|
||||
spin_unlock_bh(&atmel_aes.lock);
|
||||
|
||||
return aes_dd;
|
||||
}
|
||||
|
||||
@ -967,7 +960,6 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
|
||||
ctx = crypto_tfm_ctx(areq->tfm);
|
||||
|
||||
dd->areq = areq;
|
||||
dd->ctx = ctx;
|
||||
start_async = (areq != new_areq);
|
||||
dd->is_async = start_async;
|
||||
|
||||
@ -1083,12 +1075,48 @@ static int atmel_aes_ctr_start(struct atmel_aes_dev *dd)
|
||||
return atmel_aes_ctr_transfer(dd);
|
||||
}
|
||||
|
||||
static int atmel_aes_xts_fallback(struct skcipher_request *req, bool enc)
|
||||
{
|
||||
struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req);
|
||||
struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(
|
||||
crypto_skcipher_reqtfm(req));
|
||||
|
||||
skcipher_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
|
||||
skcipher_request_set_callback(&rctx->fallback_req, req->base.flags,
|
||||
req->base.complete, req->base.data);
|
||||
skcipher_request_set_crypt(&rctx->fallback_req, req->src, req->dst,
|
||||
req->cryptlen, req->iv);
|
||||
|
||||
return enc ? crypto_skcipher_encrypt(&rctx->fallback_req) :
|
||||
crypto_skcipher_decrypt(&rctx->fallback_req);
|
||||
}
|
||||
|
||||
static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
|
||||
{
|
||||
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
|
||||
struct atmel_aes_base_ctx *ctx = crypto_skcipher_ctx(skcipher);
|
||||
struct atmel_aes_reqctx *rctx;
|
||||
struct atmel_aes_dev *dd;
|
||||
u32 opmode = mode & AES_FLAGS_OPMODE_MASK;
|
||||
|
||||
if (opmode == AES_FLAGS_XTS) {
|
||||
if (req->cryptlen < XTS_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (!IS_ALIGNED(req->cryptlen, XTS_BLOCK_SIZE))
|
||||
return atmel_aes_xts_fallback(req,
|
||||
mode & AES_FLAGS_ENCRYPT);
|
||||
}
|
||||
|
||||
/*
|
||||
* ECB, CBC, CFB, OFB or CTR mode require the plaintext and ciphertext
|
||||
* to have a positve integer length.
|
||||
*/
|
||||
if (!req->cryptlen && opmode != AES_FLAGS_XTS)
|
||||
return 0;
|
||||
|
||||
if ((opmode == AES_FLAGS_ECB || opmode == AES_FLAGS_CBC) &&
|
||||
!IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(skcipher)))
|
||||
return -EINVAL;
|
||||
|
||||
switch (mode & AES_FLAGS_OPMODE_MASK) {
|
||||
case AES_FLAGS_CFB8:
|
||||
@ -1113,14 +1141,10 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
|
||||
}
|
||||
ctx->is_aead = false;
|
||||
|
||||
dd = atmel_aes_find_dev(ctx);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
rctx = skcipher_request_ctx(req);
|
||||
rctx->mode = mode;
|
||||
|
||||
if ((mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB &&
|
||||
if (opmode != AES_FLAGS_ECB &&
|
||||
!(mode & AES_FLAGS_ENCRYPT) && req->src == req->dst) {
|
||||
unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
|
||||
|
||||
@ -1130,7 +1154,7 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode)
|
||||
ivsize, 0);
|
||||
}
|
||||
|
||||
return atmel_aes_handle_queue(dd, &req->base);
|
||||
return atmel_aes_handle_queue(ctx->dd, &req->base);
|
||||
}
|
||||
|
||||
static int atmel_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
@ -1242,8 +1266,15 @@ static int atmel_aes_ctr_decrypt(struct skcipher_request *req)
|
||||
static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct atmel_aes_dev *dd;
|
||||
|
||||
dd = atmel_aes_dev_alloc(&ctx->base);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
|
||||
ctx->base.dd = dd;
|
||||
ctx->base.dd->ctx = &ctx->base;
|
||||
ctx->base.start = atmel_aes_start;
|
||||
|
||||
return 0;
|
||||
@ -1252,8 +1283,15 @@ static int atmel_aes_init_tfm(struct crypto_skcipher *tfm)
|
||||
static int atmel_aes_ctr_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct atmel_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct atmel_aes_dev *dd;
|
||||
|
||||
dd = atmel_aes_dev_alloc(&ctx->base);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
|
||||
ctx->base.dd = dd;
|
||||
ctx->base.dd->ctx = &ctx->base;
|
||||
ctx->base.start = atmel_aes_ctr_start;
|
||||
|
||||
return 0;
|
||||
@ -1290,7 +1328,7 @@ static struct skcipher_alg aes_algs[] = {
|
||||
{
|
||||
.base.cra_name = "ofb(aes)",
|
||||
.base.cra_driver_name = "atmel-ofb-aes",
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct atmel_aes_ctx),
|
||||
|
||||
.init = atmel_aes_init_tfm,
|
||||
@ -1691,20 +1729,15 @@ static int atmel_aes_gcm_crypt(struct aead_request *req,
|
||||
{
|
||||
struct atmel_aes_base_ctx *ctx;
|
||||
struct atmel_aes_reqctx *rctx;
|
||||
struct atmel_aes_dev *dd;
|
||||
|
||||
ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
|
||||
ctx->block_size = AES_BLOCK_SIZE;
|
||||
ctx->is_aead = true;
|
||||
|
||||
dd = atmel_aes_find_dev(ctx);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
rctx = aead_request_ctx(req);
|
||||
rctx->mode = AES_FLAGS_GCM | mode;
|
||||
|
||||
return atmel_aes_handle_queue(dd, &req->base);
|
||||
return atmel_aes_handle_queue(ctx->dd, &req->base);
|
||||
}
|
||||
|
||||
static int atmel_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
|
||||
@ -1742,8 +1775,15 @@ static int atmel_aes_gcm_decrypt(struct aead_request *req)
|
||||
static int atmel_aes_gcm_init(struct crypto_aead *tfm)
|
||||
{
|
||||
struct atmel_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct atmel_aes_dev *dd;
|
||||
|
||||
dd = atmel_aes_dev_alloc(&ctx->base);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
crypto_aead_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
|
||||
ctx->base.dd = dd;
|
||||
ctx->base.dd->ctx = &ctx->base;
|
||||
ctx->base.start = atmel_aes_gcm_start;
|
||||
|
||||
return 0;
|
||||
@ -1819,12 +1859,8 @@ static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
|
||||
* the order of the ciphered tweak bytes need to be reversed before
|
||||
* writing them into the ODATARx registers.
|
||||
*/
|
||||
for (i = 0; i < AES_BLOCK_SIZE/2; ++i) {
|
||||
u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i];
|
||||
|
||||
tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i];
|
||||
tweak_bytes[i] = tmp;
|
||||
}
|
||||
for (i = 0; i < AES_BLOCK_SIZE/2; ++i)
|
||||
swap(tweak_bytes[i], tweak_bytes[AES_BLOCK_SIZE - 1 - i]);
|
||||
|
||||
/* Process the data. */
|
||||
atmel_aes_write_ctrl(dd, use_dma, NULL);
|
||||
@ -1849,6 +1885,13 @@ static int atmel_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
crypto_skcipher_clear_flags(ctx->fallback_tfm, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_skcipher_set_flags(ctx->fallback_tfm, tfm->base.crt_flags &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(ctx->base.key, key, keylen/2);
|
||||
memcpy(ctx->key2, key + keylen/2, keylen/2);
|
||||
ctx->base.keylen = keylen/2;
|
||||
@ -1869,18 +1912,40 @@ static int atmel_aes_xts_decrypt(struct skcipher_request *req)
|
||||
static int atmel_aes_xts_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct atmel_aes_dev *dd;
|
||||
const char *tfm_name = crypto_tfm_alg_name(&tfm->base);
|
||||
|
||||
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx));
|
||||
dd = atmel_aes_dev_alloc(&ctx->base);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
ctx->fallback_tfm = crypto_alloc_skcipher(tfm_name, 0,
|
||||
CRYPTO_ALG_NEED_FALLBACK);
|
||||
if (IS_ERR(ctx->fallback_tfm))
|
||||
return PTR_ERR(ctx->fallback_tfm);
|
||||
|
||||
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_aes_reqctx) +
|
||||
crypto_skcipher_reqsize(ctx->fallback_tfm));
|
||||
ctx->base.dd = dd;
|
||||
ctx->base.dd->ctx = &ctx->base;
|
||||
ctx->base.start = atmel_aes_xts_start;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void atmel_aes_xts_exit_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
crypto_free_skcipher(ctx->fallback_tfm);
|
||||
}
|
||||
|
||||
static struct skcipher_alg aes_xts_alg = {
|
||||
.base.cra_name = "xts(aes)",
|
||||
.base.cra_driver_name = "atmel-xts-aes",
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct atmel_aes_xts_ctx),
|
||||
.base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
|
||||
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
@ -1889,6 +1954,7 @@ static struct skcipher_alg aes_xts_alg = {
|
||||
.encrypt = atmel_aes_xts_encrypt,
|
||||
.decrypt = atmel_aes_xts_decrypt,
|
||||
.init = atmel_aes_xts_init_tfm,
|
||||
.exit = atmel_aes_xts_exit_tfm,
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC)
|
||||
@ -2075,6 +2141,11 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
|
||||
{
|
||||
struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
unsigned int auth_reqsize = atmel_sha_authenc_get_reqsize();
|
||||
struct atmel_aes_dev *dd;
|
||||
|
||||
dd = atmel_aes_dev_alloc(&ctx->base);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
ctx->auth = atmel_sha_authenc_spawn(auth_mode);
|
||||
if (IS_ERR(ctx->auth))
|
||||
@ -2082,6 +2153,8 @@ static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm,
|
||||
|
||||
crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) +
|
||||
auth_reqsize));
|
||||
ctx->base.dd = dd;
|
||||
ctx->base.dd->ctx = &ctx->base;
|
||||
ctx->base.start = atmel_aes_authenc_start;
|
||||
|
||||
return 0;
|
||||
@ -2127,7 +2200,6 @@ static int atmel_aes_authenc_crypt(struct aead_request *req,
|
||||
struct atmel_aes_base_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
u32 authsize = crypto_aead_authsize(tfm);
|
||||
bool enc = (mode & AES_FLAGS_ENCRYPT);
|
||||
struct atmel_aes_dev *dd;
|
||||
|
||||
/* Compute text length. */
|
||||
if (!enc && req->cryptlen < authsize)
|
||||
@ -2146,11 +2218,7 @@ static int atmel_aes_authenc_crypt(struct aead_request *req,
|
||||
ctx->block_size = AES_BLOCK_SIZE;
|
||||
ctx->is_aead = true;
|
||||
|
||||
dd = atmel_aes_find_dev(ctx);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
return atmel_aes_handle_queue(dd, &req->base);
|
||||
return atmel_aes_handle_queue(ctx->dd, &req->base);
|
||||
}
|
||||
|
||||
static int atmel_aes_authenc_cbc_aes_encrypt(struct aead_request *req)
|
||||
@ -2358,7 +2426,7 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
|
||||
|
||||
static void atmel_aes_crypto_alg_init(struct crypto_alg *alg)
|
||||
{
|
||||
alg->cra_flags = CRYPTO_ALG_ASYNC;
|
||||
alg->cra_flags |= CRYPTO_ALG_ASYNC;
|
||||
alg->cra_alignmask = 0xf;
|
||||
alg->cra_priority = ATMEL_AES_PRIORITY;
|
||||
alg->cra_module = THIS_MODULE;
|
||||
|
@ -196,23 +196,15 @@ static void atmel_tdes_write_n(struct atmel_tdes_dev *dd, u32 offset,
|
||||
atmel_tdes_write(dd, offset, *value);
|
||||
}
|
||||
|
||||
static struct atmel_tdes_dev *atmel_tdes_find_dev(struct atmel_tdes_ctx *ctx)
|
||||
static struct atmel_tdes_dev *atmel_tdes_dev_alloc(void)
|
||||
{
|
||||
struct atmel_tdes_dev *tdes_dd = NULL;
|
||||
struct atmel_tdes_dev *tmp;
|
||||
struct atmel_tdes_dev *tdes_dd;
|
||||
|
||||
spin_lock_bh(&atmel_tdes.lock);
|
||||
if (!ctx->dd) {
|
||||
list_for_each_entry(tmp, &atmel_tdes.dev_list, list) {
|
||||
tdes_dd = tmp;
|
||||
break;
|
||||
}
|
||||
ctx->dd = tdes_dd;
|
||||
} else {
|
||||
tdes_dd = ctx->dd;
|
||||
}
|
||||
/* One TDES IP per SoC. */
|
||||
tdes_dd = list_first_entry_or_null(&atmel_tdes.dev_list,
|
||||
struct atmel_tdes_dev, list);
|
||||
spin_unlock_bh(&atmel_tdes.lock);
|
||||
|
||||
return tdes_dd;
|
||||
}
|
||||
|
||||
@ -320,7 +312,7 @@ static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd)
|
||||
dd->buf_out, dd->buflen, dd->dma_size, 1);
|
||||
if (count != dd->dma_size) {
|
||||
err = -EINVAL;
|
||||
pr_err("not all data converted: %zu\n", count);
|
||||
dev_dbg(dd->dev, "not all data converted: %zu\n", count);
|
||||
}
|
||||
}
|
||||
|
||||
@ -337,24 +329,24 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd)
|
||||
dd->buflen &= ~(DES_BLOCK_SIZE - 1);
|
||||
|
||||
if (!dd->buf_in || !dd->buf_out) {
|
||||
dev_err(dd->dev, "unable to alloc pages.\n");
|
||||
dev_dbg(dd->dev, "unable to alloc pages.\n");
|
||||
goto err_alloc;
|
||||
}
|
||||
|
||||
/* MAP here */
|
||||
dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in,
|
||||
dd->buflen, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(dd->dev, dd->dma_addr_in)) {
|
||||
dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
|
||||
err = -EINVAL;
|
||||
err = dma_mapping_error(dd->dev, dd->dma_addr_in);
|
||||
if (err) {
|
||||
dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen);
|
||||
goto err_map_in;
|
||||
}
|
||||
|
||||
dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out,
|
||||
dd->buflen, DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(dd->dev, dd->dma_addr_out)) {
|
||||
dev_err(dd->dev, "dma %zd bytes error\n", dd->buflen);
|
||||
err = -EINVAL;
|
||||
err = dma_mapping_error(dd->dev, dd->dma_addr_out);
|
||||
if (err) {
|
||||
dev_dbg(dd->dev, "dma %zd bytes error\n", dd->buflen);
|
||||
goto err_map_out;
|
||||
}
|
||||
|
||||
@ -367,8 +359,6 @@ static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd)
|
||||
err_alloc:
|
||||
free_page((unsigned long)dd->buf_out);
|
||||
free_page((unsigned long)dd->buf_in);
|
||||
if (err)
|
||||
pr_err("error: %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -520,14 +510,14 @@ static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd)
|
||||
|
||||
err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
|
||||
if (!err) {
|
||||
dev_err(dd->dev, "dma_map_sg() error\n");
|
||||
dev_dbg(dd->dev, "dma_map_sg() error\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = dma_map_sg(dd->dev, dd->out_sg, 1,
|
||||
DMA_FROM_DEVICE);
|
||||
if (!err) {
|
||||
dev_err(dd->dev, "dma_map_sg() error\n");
|
||||
dev_dbg(dd->dev, "dma_map_sg() error\n");
|
||||
dma_unmap_sg(dd->dev, dd->in_sg, 1,
|
||||
DMA_TO_DEVICE);
|
||||
return -EINVAL;
|
||||
@ -646,7 +636,6 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
|
||||
rctx->mode &= TDES_FLAGS_MODE_MASK;
|
||||
dd->flags = (dd->flags & ~TDES_FLAGS_MODE_MASK) | rctx->mode;
|
||||
dd->ctx = ctx;
|
||||
ctx->dd = dd;
|
||||
|
||||
err = atmel_tdes_write_ctrl(dd);
|
||||
if (!err)
|
||||
@ -679,7 +668,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd)
|
||||
dd->buf_out, dd->buflen, dd->dma_size, 1);
|
||||
if (count != dd->dma_size) {
|
||||
err = -EINVAL;
|
||||
pr_err("not all data converted: %zu\n", count);
|
||||
dev_dbg(dd->dev, "not all data converted: %zu\n", count);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -691,11 +680,15 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
|
||||
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
|
||||
struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(skcipher);
|
||||
struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req);
|
||||
struct device *dev = ctx->dd->dev;
|
||||
|
||||
if (!req->cryptlen)
|
||||
return 0;
|
||||
|
||||
switch (mode & TDES_FLAGS_OPMODE_MASK) {
|
||||
case TDES_FLAGS_CFB8:
|
||||
if (!IS_ALIGNED(req->cryptlen, CFB8_BLOCK_SIZE)) {
|
||||
pr_err("request size is not exact amount of CFB8 blocks\n");
|
||||
dev_dbg(dev, "request size is not exact amount of CFB8 blocks\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->block_size = CFB8_BLOCK_SIZE;
|
||||
@ -703,7 +696,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
|
||||
|
||||
case TDES_FLAGS_CFB16:
|
||||
if (!IS_ALIGNED(req->cryptlen, CFB16_BLOCK_SIZE)) {
|
||||
pr_err("request size is not exact amount of CFB16 blocks\n");
|
||||
dev_dbg(dev, "request size is not exact amount of CFB16 blocks\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->block_size = CFB16_BLOCK_SIZE;
|
||||
@ -711,7 +704,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
|
||||
|
||||
case TDES_FLAGS_CFB32:
|
||||
if (!IS_ALIGNED(req->cryptlen, CFB32_BLOCK_SIZE)) {
|
||||
pr_err("request size is not exact amount of CFB32 blocks\n");
|
||||
dev_dbg(dev, "request size is not exact amount of CFB32 blocks\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->block_size = CFB32_BLOCK_SIZE;
|
||||
@ -719,7 +712,7 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode)
|
||||
|
||||
default:
|
||||
if (!IS_ALIGNED(req->cryptlen, DES_BLOCK_SIZE)) {
|
||||
pr_err("request size is not exact amount of DES blocks\n");
|
||||
dev_dbg(dev, "request size is not exact amount of DES blocks\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->block_size = DES_BLOCK_SIZE;
|
||||
@ -897,14 +890,13 @@ static int atmel_tdes_ofb_decrypt(struct skcipher_request *req)
|
||||
static int atmel_tdes_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct atmel_tdes_dev *dd;
|
||||
|
||||
ctx->dd = atmel_tdes_dev_alloc();
|
||||
if (!ctx->dd)
|
||||
return -ENODEV;
|
||||
|
||||
crypto_skcipher_set_reqsize(tfm, sizeof(struct atmel_tdes_reqctx));
|
||||
|
||||
dd = atmel_tdes_find_dev(ctx);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -999,7 +991,7 @@ static struct skcipher_alg tdes_algs[] = {
|
||||
{
|
||||
.base.cra_name = "ofb(des)",
|
||||
.base.cra_driver_name = "atmel-ofb-des",
|
||||
.base.cra_blocksize = DES_BLOCK_SIZE,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_alignmask = 0x7,
|
||||
|
||||
.min_keysize = DES_KEY_SIZE,
|
||||
|
@ -300,6 +300,9 @@ static int __sev_platform_shutdown_locked(int *error)
|
||||
struct sev_device *sev = psp_master->sev_data;
|
||||
int ret;
|
||||
|
||||
if (sev->state == SEV_STATE_UNINIT)
|
||||
return 0;
|
||||
|
||||
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -1019,6 +1022,20 @@ int sev_dev_init(struct psp_device *psp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void sev_firmware_shutdown(struct sev_device *sev)
|
||||
{
|
||||
sev_platform_shutdown(NULL);
|
||||
|
||||
if (sev_es_tmr) {
|
||||
/* The TMR area was encrypted, flush it from the cache */
|
||||
wbinvd_on_all_cpus();
|
||||
|
||||
free_pages((unsigned long)sev_es_tmr,
|
||||
get_order(SEV_ES_TMR_SIZE));
|
||||
sev_es_tmr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void sev_dev_destroy(struct psp_device *psp)
|
||||
{
|
||||
struct sev_device *sev = psp->sev_data;
|
||||
@ -1026,6 +1043,8 @@ void sev_dev_destroy(struct psp_device *psp)
|
||||
if (!sev)
|
||||
return;
|
||||
|
||||
sev_firmware_shutdown(sev);
|
||||
|
||||
if (sev->misc)
|
||||
kref_put(&misc_dev->refcount, sev_exit);
|
||||
|
||||
@ -1056,21 +1075,6 @@ void sev_pci_init(void)
|
||||
if (sev_get_api_version())
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* If platform is not in UNINIT state then firmware upgrade and/or
|
||||
* platform INIT command will fail. These command require UNINIT state.
|
||||
*
|
||||
* In a normal boot we should never run into case where the firmware
|
||||
* is not in UNINIT state on boot. But in case of kexec boot, a reboot
|
||||
* may not go through a typical shutdown sequence and may leave the
|
||||
* firmware in INIT or WORKING state.
|
||||
*/
|
||||
|
||||
if (sev->state != SEV_STATE_UNINIT) {
|
||||
sev_platform_shutdown(NULL);
|
||||
sev->state = SEV_STATE_UNINIT;
|
||||
}
|
||||
|
||||
if (sev_version_greater_or_equal(0, 15) &&
|
||||
sev_update_firmware(sev->dev) == 0)
|
||||
sev_get_api_version();
|
||||
@ -1115,17 +1119,10 @@ void sev_pci_init(void)
|
||||
|
||||
void sev_pci_exit(void)
|
||||
{
|
||||
if (!psp_master->sev_data)
|
||||
struct sev_device *sev = psp_master->sev_data;
|
||||
|
||||
if (!sev)
|
||||
return;
|
||||
|
||||
sev_platform_shutdown(NULL);
|
||||
|
||||
if (sev_es_tmr) {
|
||||
/* The TMR area was encrypted, flush it from the cache */
|
||||
wbinvd_on_all_cpus();
|
||||
|
||||
free_pages((unsigned long)sev_es_tmr,
|
||||
get_order(SEV_ES_TMR_SIZE));
|
||||
sev_es_tmr = NULL;
|
||||
}
|
||||
sev_firmware_shutdown(sev);
|
||||
}
|
||||
|
@ -241,6 +241,17 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void sp_pci_shutdown(struct pci_dev *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct sp_device *sp = dev_get_drvdata(dev);
|
||||
|
||||
if (!sp)
|
||||
return;
|
||||
|
||||
sp_destroy(sp);
|
||||
}
|
||||
|
||||
static void sp_pci_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
@ -349,6 +360,12 @@ static const struct sp_dev_vdata dev_vdata[] = {
|
||||
#endif
|
||||
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
|
||||
.psp_vdata = &pspv3,
|
||||
#endif
|
||||
},
|
||||
{ /* 5 */
|
||||
.bar = 2,
|
||||
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
|
||||
.psp_vdata = &pspv2,
|
||||
#endif
|
||||
},
|
||||
};
|
||||
@ -359,6 +376,7 @@ static const struct pci_device_id sp_pci_table[] = {
|
||||
{ PCI_VDEVICE(AMD, 0x1486), (kernel_ulong_t)&dev_vdata[3] },
|
||||
{ PCI_VDEVICE(AMD, 0x15DF), (kernel_ulong_t)&dev_vdata[4] },
|
||||
{ PCI_VDEVICE(AMD, 0x1649), (kernel_ulong_t)&dev_vdata[4] },
|
||||
{ PCI_VDEVICE(AMD, 0x14CA), (kernel_ulong_t)&dev_vdata[5] },
|
||||
/* Last entry must be zero */
|
||||
{ 0, }
|
||||
};
|
||||
@ -371,6 +389,7 @@ static struct pci_driver sp_pci_driver = {
|
||||
.id_table = sp_pci_table,
|
||||
.probe = sp_pci_probe,
|
||||
.remove = sp_pci_remove,
|
||||
.shutdown = sp_pci_shutdown,
|
||||
.driver.pm = &sp_pci_pm_ops,
|
||||
};
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/uacce.h>
|
||||
#include "hpre.h"
|
||||
@ -81,6 +82,16 @@
|
||||
#define HPRE_PREFETCH_DISABLE BIT(30)
|
||||
#define HPRE_SVA_DISABLE_READY (BIT(4) | BIT(8))
|
||||
|
||||
/* clock gate */
|
||||
#define HPRE_CLKGATE_CTL 0x301a10
|
||||
#define HPRE_PEH_CFG_AUTO_GATE 0x301a2c
|
||||
#define HPRE_CLUSTER_DYN_CTL 0x302010
|
||||
#define HPRE_CORE_SHB_CFG 0x302088
|
||||
#define HPRE_CLKGATE_CTL_EN BIT(0)
|
||||
#define HPRE_PEH_CFG_AUTO_GATE_EN BIT(0)
|
||||
#define HPRE_CLUSTER_DYN_CTL_EN BIT(0)
|
||||
#define HPRE_CORE_GATE_EN (BIT(30) | BIT(31))
|
||||
|
||||
#define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044
|
||||
#define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0)
|
||||
#define HPRE_WR_MSI_PORT BIT(2)
|
||||
@ -417,12 +428,63 @@ static void hpre_close_sva_prefetch(struct hisi_qm *qm)
|
||||
pci_err(qm->pdev, "failed to close sva prefetch\n");
|
||||
}
|
||||
|
||||
static void hpre_enable_clock_gate(struct hisi_qm *qm)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
if (qm->ver < QM_HW_V3)
|
||||
return;
|
||||
|
||||
val = readl(qm->io_base + HPRE_CLKGATE_CTL);
|
||||
val |= HPRE_CLKGATE_CTL_EN;
|
||||
writel(val, qm->io_base + HPRE_CLKGATE_CTL);
|
||||
|
||||
val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
|
||||
val |= HPRE_PEH_CFG_AUTO_GATE_EN;
|
||||
writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
|
||||
|
||||
val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL);
|
||||
val |= HPRE_CLUSTER_DYN_CTL_EN;
|
||||
writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL);
|
||||
|
||||
val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG);
|
||||
val |= HPRE_CORE_GATE_EN;
|
||||
writel(val, qm->io_base + HPRE_CORE_SHB_CFG);
|
||||
}
|
||||
|
||||
static void hpre_disable_clock_gate(struct hisi_qm *qm)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
if (qm->ver < QM_HW_V3)
|
||||
return;
|
||||
|
||||
val = readl(qm->io_base + HPRE_CLKGATE_CTL);
|
||||
val &= ~HPRE_CLKGATE_CTL_EN;
|
||||
writel(val, qm->io_base + HPRE_CLKGATE_CTL);
|
||||
|
||||
val = readl(qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
|
||||
val &= ~HPRE_PEH_CFG_AUTO_GATE_EN;
|
||||
writel(val, qm->io_base + HPRE_PEH_CFG_AUTO_GATE);
|
||||
|
||||
val = readl(qm->io_base + HPRE_CLUSTER_DYN_CTL);
|
||||
val &= ~HPRE_CLUSTER_DYN_CTL_EN;
|
||||
writel(val, qm->io_base + HPRE_CLUSTER_DYN_CTL);
|
||||
|
||||
val = readl_relaxed(qm->io_base + HPRE_CORE_SHB_CFG);
|
||||
val &= ~HPRE_CORE_GATE_EN;
|
||||
writel(val, qm->io_base + HPRE_CORE_SHB_CFG);
|
||||
}
|
||||
|
||||
static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
|
||||
{
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
u32 val;
|
||||
int ret;
|
||||
|
||||
/* disabel dynamic clock gate before sram init */
|
||||
hpre_disable_clock_gate(qm);
|
||||
|
||||
writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_ARUSER_M_CFG_ENABLE);
|
||||
writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE);
|
||||
writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG);
|
||||
@ -473,6 +535,8 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
|
||||
/* Config data buffer pasid needed by Kunpeng 920 */
|
||||
hpre_config_pasid(qm);
|
||||
|
||||
hpre_enable_clock_gate(qm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -595,10 +659,15 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
|
||||
size_t count, loff_t *pos)
|
||||
{
|
||||
struct hpre_debugfs_file *file = filp->private_data;
|
||||
struct hisi_qm *qm = hpre_file_to_qm(file);
|
||||
char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
|
||||
u32 val;
|
||||
int ret;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock_irq(&file->lock);
|
||||
switch (file->type) {
|
||||
case HPRE_CLEAR_ENABLE:
|
||||
@ -608,18 +677,25 @@ static ssize_t hpre_ctrl_debug_read(struct file *filp, char __user *buf,
|
||||
val = hpre_cluster_inqry_read(file);
|
||||
break;
|
||||
default:
|
||||
spin_unlock_irq(&file->lock);
|
||||
return -EINVAL;
|
||||
goto err_input;
|
||||
}
|
||||
spin_unlock_irq(&file->lock);
|
||||
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
ret = snprintf(tbuf, HPRE_DBGFS_VAL_MAX_LEN, "%u\n", val);
|
||||
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
|
||||
|
||||
err_input:
|
||||
spin_unlock_irq(&file->lock);
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
|
||||
size_t count, loff_t *pos)
|
||||
{
|
||||
struct hpre_debugfs_file *file = filp->private_data;
|
||||
struct hisi_qm *qm = hpre_file_to_qm(file);
|
||||
char tbuf[HPRE_DBGFS_VAL_MAX_LEN];
|
||||
unsigned long val;
|
||||
int len, ret;
|
||||
@ -639,6 +715,10 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
|
||||
if (kstrtoul(tbuf, 0, &val))
|
||||
return -EFAULT;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock_irq(&file->lock);
|
||||
switch (file->type) {
|
||||
case HPRE_CLEAR_ENABLE:
|
||||
@ -655,12 +735,12 @@ static ssize_t hpre_ctrl_debug_write(struct file *filp, const char __user *buf,
|
||||
ret = -EINVAL;
|
||||
goto err_input;
|
||||
}
|
||||
spin_unlock_irq(&file->lock);
|
||||
|
||||
return count;
|
||||
ret = count;
|
||||
|
||||
err_input:
|
||||
spin_unlock_irq(&file->lock);
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -700,6 +780,24 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val)
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get,
|
||||
hpre_debugfs_atomic64_set, "%llu\n");
|
||||
|
||||
static int hpre_com_regs_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
hisi_qm_regs_dump(s, s->private);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
|
||||
|
||||
static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
hisi_qm_regs_dump(s, s->private);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
|
||||
|
||||
static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
|
||||
enum hpre_ctrl_dbgfs_file type, int indx)
|
||||
{
|
||||
@ -737,8 +835,11 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm)
|
||||
regset->regs = hpre_com_dfx_regs;
|
||||
regset->nregs = ARRAY_SIZE(hpre_com_dfx_regs);
|
||||
regset->base = qm->io_base;
|
||||
regset->dev = dev;
|
||||
|
||||
debugfs_create_file("regs", 0444, qm->debug.debug_root,
|
||||
regset, &hpre_com_regs_fops);
|
||||
|
||||
debugfs_create_regset32("regs", 0444, qm->debug.debug_root, regset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -764,8 +865,10 @@ static int hpre_cluster_debugfs_init(struct hisi_qm *qm)
|
||||
regset->regs = hpre_cluster_dfx_regs;
|
||||
regset->nregs = ARRAY_SIZE(hpre_cluster_dfx_regs);
|
||||
regset->base = qm->io_base + hpre_cluster_offsets[i];
|
||||
regset->dev = dev;
|
||||
|
||||
debugfs_create_regset32("regs", 0444, tmp_d, regset);
|
||||
debugfs_create_file("regs", 0444, tmp_d, regset,
|
||||
&hpre_cluster_regs_fops);
|
||||
ret = hpre_create_debugfs_file(qm, tmp_d, HPRE_CLUSTER_CTRL,
|
||||
i + HPRE_CLUSTER_CTRL);
|
||||
if (ret)
|
||||
@ -1017,6 +1120,8 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
goto err_with_alg_register;
|
||||
}
|
||||
|
||||
hisi_qm_pm_init(qm);
|
||||
|
||||
return 0;
|
||||
|
||||
err_with_alg_register:
|
||||
@ -1040,6 +1145,7 @@ static void hpre_remove(struct pci_dev *pdev)
|
||||
struct hisi_qm *qm = pci_get_drvdata(pdev);
|
||||
int ret;
|
||||
|
||||
hisi_qm_pm_uninit(qm);
|
||||
hisi_qm_wait_task_finish(qm, &hpre_devices);
|
||||
hisi_qm_alg_unregister(qm, &hpre_devices);
|
||||
if (qm->fun_type == QM_HW_PF && qm->vfs_num) {
|
||||
@ -1062,6 +1168,10 @@ static void hpre_remove(struct pci_dev *pdev)
|
||||
hisi_qm_uninit(qm);
|
||||
}
|
||||
|
||||
static const struct dev_pm_ops hpre_pm_ops = {
|
||||
SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
|
||||
};
|
||||
|
||||
static const struct pci_error_handlers hpre_err_handler = {
|
||||
.error_detected = hisi_qm_dev_err_detected,
|
||||
.slot_reset = hisi_qm_dev_slot_reset,
|
||||
@ -1078,6 +1188,7 @@ static struct pci_driver hpre_pci_driver = {
|
||||
hisi_qm_sriov_configure : NULL,
|
||||
.err_handler = &hpre_err_handler,
|
||||
.shutdown = hisi_qm_dev_shutdown,
|
||||
.driver.pm = &hpre_pm_ops,
|
||||
};
|
||||
|
||||
static void hpre_register_debugfs(void)
|
||||
|
@ -4,12 +4,12 @@
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/aer.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/irqreturn.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uacce.h>
|
||||
@ -270,6 +270,8 @@
|
||||
#define QM_QOS_MAX_CIR_S 11
|
||||
#define QM_QOS_VAL_MAX_LEN 32
|
||||
|
||||
#define QM_AUTOSUSPEND_DELAY 3000
|
||||
|
||||
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
|
||||
(((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \
|
||||
((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \
|
||||
@ -734,6 +736,34 @@ static u32 qm_get_irq_num_v3(struct hisi_qm *qm)
|
||||
return QM_IRQ_NUM_VF_V3;
|
||||
}
|
||||
|
||||
static int qm_pm_get_sync(struct hisi_qm *qm)
|
||||
{
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
int ret;
|
||||
|
||||
if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
|
||||
return 0;
|
||||
|
||||
ret = pm_runtime_resume_and_get(dev);
|
||||
if (ret < 0) {
|
||||
dev_err(dev, "failed to get_sync(%d).\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void qm_pm_put_sync(struct hisi_qm *qm)
|
||||
{
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
|
||||
if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
|
||||
return;
|
||||
|
||||
pm_runtime_mark_last_busy(dev);
|
||||
pm_runtime_put_autosuspend(dev);
|
||||
}
|
||||
|
||||
static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
|
||||
{
|
||||
u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
|
||||
@ -1173,16 +1203,13 @@ static struct hisi_qm *file_to_qm(struct debugfs_file *file)
|
||||
return container_of(debug, struct hisi_qm, debug);
|
||||
}
|
||||
|
||||
static u32 current_q_read(struct debugfs_file *file)
|
||||
static u32 current_q_read(struct hisi_qm *qm)
|
||||
{
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
|
||||
return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT;
|
||||
}
|
||||
|
||||
static int current_q_write(struct debugfs_file *file, u32 val)
|
||||
static int current_q_write(struct hisi_qm *qm, u32 val)
|
||||
{
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
u32 tmp;
|
||||
|
||||
if (val >= qm->debug.curr_qm_qp_num)
|
||||
@ -1199,18 +1226,14 @@ static int current_q_write(struct debugfs_file *file, u32 val)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 clear_enable_read(struct debugfs_file *file)
|
||||
static u32 clear_enable_read(struct hisi_qm *qm)
|
||||
{
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
|
||||
return readl(qm->io_base + QM_DFX_CNT_CLR_CE);
|
||||
}
|
||||
|
||||
/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */
|
||||
static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl)
|
||||
static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl)
|
||||
{
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
|
||||
if (rd_clr_ctrl > 1)
|
||||
return -EINVAL;
|
||||
|
||||
@ -1219,16 +1242,13 @@ static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 current_qm_read(struct debugfs_file *file)
|
||||
static u32 current_qm_read(struct hisi_qm *qm)
|
||||
{
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
|
||||
return readl(qm->io_base + QM_DFX_MB_CNT_VF);
|
||||
}
|
||||
|
||||
static int current_qm_write(struct debugfs_file *file, u32 val)
|
||||
static int current_qm_write(struct hisi_qm *qm, u32 val)
|
||||
{
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
u32 tmp;
|
||||
|
||||
if (val > qm->vfs_num)
|
||||
@ -1259,29 +1279,39 @@ static ssize_t qm_debug_read(struct file *filp, char __user *buf,
|
||||
{
|
||||
struct debugfs_file *file = filp->private_data;
|
||||
enum qm_debug_file index = file->index;
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
char tbuf[QM_DBG_TMP_BUF_LEN];
|
||||
u32 val;
|
||||
int ret;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&file->lock);
|
||||
switch (index) {
|
||||
case CURRENT_QM:
|
||||
val = current_qm_read(file);
|
||||
val = current_qm_read(qm);
|
||||
break;
|
||||
case CURRENT_Q:
|
||||
val = current_q_read(file);
|
||||
val = current_q_read(qm);
|
||||
break;
|
||||
case CLEAR_ENABLE:
|
||||
val = clear_enable_read(file);
|
||||
val = clear_enable_read(qm);
|
||||
break;
|
||||
default:
|
||||
mutex_unlock(&file->lock);
|
||||
return -EINVAL;
|
||||
goto err_input;
|
||||
}
|
||||
mutex_unlock(&file->lock);
|
||||
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val);
|
||||
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
|
||||
|
||||
err_input:
|
||||
mutex_unlock(&file->lock);
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
|
||||
@ -1289,6 +1319,7 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
|
||||
{
|
||||
struct debugfs_file *file = filp->private_data;
|
||||
enum qm_debug_file index = file->index;
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
unsigned long val;
|
||||
char tbuf[QM_DBG_TMP_BUF_LEN];
|
||||
int len, ret;
|
||||
@ -1308,22 +1339,28 @@ static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
|
||||
if (kstrtoul(tbuf, 0, &val))
|
||||
return -EFAULT;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&file->lock);
|
||||
switch (index) {
|
||||
case CURRENT_QM:
|
||||
ret = current_qm_write(file, val);
|
||||
ret = current_qm_write(qm, val);
|
||||
break;
|
||||
case CURRENT_Q:
|
||||
ret = current_q_write(file, val);
|
||||
ret = current_q_write(qm, val);
|
||||
break;
|
||||
case CLEAR_ENABLE:
|
||||
ret = clear_enable_write(file, val);
|
||||
ret = clear_enable_write(qm, val);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
mutex_unlock(&file->lock);
|
||||
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1337,13 +1374,8 @@ static const struct file_operations qm_debug_fops = {
|
||||
.write = qm_debug_write,
|
||||
};
|
||||
|
||||
struct qm_dfx_registers {
|
||||
char *reg_name;
|
||||
u64 reg_offset;
|
||||
};
|
||||
|
||||
#define CNT_CYC_REGS_NUM 10
|
||||
static struct qm_dfx_registers qm_dfx_regs[] = {
|
||||
static const struct debugfs_reg32 qm_dfx_regs[] = {
|
||||
/* XXX_CNT are reading clear register */
|
||||
{"QM_ECC_1BIT_CNT ", 0x104000ull},
|
||||
{"QM_ECC_MBIT_CNT ", 0x104008ull},
|
||||
@ -1369,31 +1401,59 @@ static struct qm_dfx_registers qm_dfx_regs[] = {
|
||||
{"QM_DFX_FF_ST5 ", 0x1040dcull},
|
||||
{"QM_DFX_FF_ST6 ", 0x1040e0ull},
|
||||
{"QM_IN_IDLE_ST ", 0x1040e4ull},
|
||||
{ NULL, 0}
|
||||
};
|
||||
|
||||
static struct qm_dfx_registers qm_vf_dfx_regs[] = {
|
||||
static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
|
||||
{"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull},
|
||||
{ NULL, 0}
|
||||
};
|
||||
|
||||
/**
|
||||
* hisi_qm_regs_dump() - Dump registers's value.
|
||||
* @s: debugfs file handle.
|
||||
* @regset: accelerator registers information.
|
||||
*
|
||||
* Dump accelerator registers.
|
||||
*/
|
||||
void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(regset->dev);
|
||||
struct hisi_qm *qm = pci_get_drvdata(pdev);
|
||||
const struct debugfs_reg32 *regs = regset->regs;
|
||||
int regs_len = regset->nregs;
|
||||
int i, ret;
|
||||
u32 val;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
for (i = 0; i < regs_len; i++) {
|
||||
val = readl(regset->base + regs[i].offset);
|
||||
seq_printf(s, "%s= 0x%08x\n", regs[i].name, val);
|
||||
}
|
||||
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_regs_dump);
|
||||
|
||||
static int qm_regs_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct hisi_qm *qm = s->private;
|
||||
struct qm_dfx_registers *regs;
|
||||
u32 val;
|
||||
struct debugfs_regset32 regset;
|
||||
|
||||
if (qm->fun_type == QM_HW_PF)
|
||||
regs = qm_dfx_regs;
|
||||
else
|
||||
regs = qm_vf_dfx_regs;
|
||||
|
||||
while (regs->reg_name) {
|
||||
val = readl(qm->io_base + regs->reg_offset);
|
||||
seq_printf(s, "%s= 0x%08x\n", regs->reg_name, val);
|
||||
regs++;
|
||||
if (qm->fun_type == QM_HW_PF) {
|
||||
regset.regs = qm_dfx_regs;
|
||||
regset.nregs = ARRAY_SIZE(qm_dfx_regs);
|
||||
} else {
|
||||
regset.regs = qm_vf_dfx_regs;
|
||||
regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs);
|
||||
}
|
||||
|
||||
regset.base = qm->io_base;
|
||||
regset.dev = &qm->pdev->dev;
|
||||
|
||||
hisi_qm_regs_dump(s, ®set);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1823,16 +1883,24 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
|
||||
if (*pos)
|
||||
return 0;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Judge if the instance is being reset. */
|
||||
if (unlikely(atomic_read(&qm->status.flags) == QM_STOP))
|
||||
return 0;
|
||||
|
||||
if (count > QM_DBG_WRITE_LEN)
|
||||
return -ENOSPC;
|
||||
if (count > QM_DBG_WRITE_LEN) {
|
||||
ret = -ENOSPC;
|
||||
goto put_dfx_access;
|
||||
}
|
||||
|
||||
cmd_buf = memdup_user_nul(buffer, count);
|
||||
if (IS_ERR(cmd_buf))
|
||||
return PTR_ERR(cmd_buf);
|
||||
if (IS_ERR(cmd_buf)) {
|
||||
ret = PTR_ERR(cmd_buf);
|
||||
goto put_dfx_access;
|
||||
}
|
||||
|
||||
cmd_buf_tmp = strchr(cmd_buf, '\n');
|
||||
if (cmd_buf_tmp) {
|
||||
@ -1843,12 +1911,16 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
|
||||
ret = qm_cmd_write_dump(qm, cmd_buf);
|
||||
if (ret) {
|
||||
kfree(cmd_buf);
|
||||
return ret;
|
||||
goto put_dfx_access;
|
||||
}
|
||||
|
||||
kfree(cmd_buf);
|
||||
|
||||
return count;
|
||||
ret = count;
|
||||
|
||||
put_dfx_access:
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations qm_cmd_fops = {
|
||||
@ -2445,11 +2517,19 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type)
|
||||
struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
|
||||
{
|
||||
struct hisi_qp *qp;
|
||||
int ret;
|
||||
|
||||
ret = qm_pm_get_sync(qm);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
down_write(&qm->qps_lock);
|
||||
qp = qm_create_qp_nolock(qm, alg_type);
|
||||
up_write(&qm->qps_lock);
|
||||
|
||||
if (IS_ERR(qp))
|
||||
qm_pm_put_sync(qm);
|
||||
|
||||
return qp;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
|
||||
@ -2475,6 +2555,8 @@ void hisi_qm_release_qp(struct hisi_qp *qp)
|
||||
idr_remove(&qm->qp_idr, qp->qp_id);
|
||||
|
||||
up_write(&qm->qps_lock);
|
||||
|
||||
qm_pm_put_sync(qm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
|
||||
|
||||
@ -3200,6 +3282,10 @@ static void hisi_qm_pre_init(struct hisi_qm *qm)
|
||||
init_rwsem(&qm->qps_lock);
|
||||
qm->qp_in_used = 0;
|
||||
qm->misc_ctl = false;
|
||||
if (qm->fun_type == QM_HW_PF && qm->ver > QM_HW_V2) {
|
||||
if (!acpi_device_power_manageable(ACPI_COMPANION(&pdev->dev)))
|
||||
dev_info(&pdev->dev, "_PS0 and _PR0 are not defined");
|
||||
}
|
||||
}
|
||||
|
||||
static void qm_cmd_uninit(struct hisi_qm *qm)
|
||||
@ -4057,10 +4143,15 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
|
||||
u32 qos_val, ir;
|
||||
int ret;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Mailbox and reset cannot be operated at the same time */
|
||||
if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
|
||||
pci_err(qm->pdev, "dev resetting, read alg qos failed!\n");
|
||||
return -EAGAIN;
|
||||
ret = -EAGAIN;
|
||||
goto err_put_dfx_access;
|
||||
}
|
||||
|
||||
if (qm->fun_type == QM_HW_PF) {
|
||||
@ -4079,6 +4170,8 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
|
||||
|
||||
err_get_status:
|
||||
clear_bit(QM_RESETTING, &qm->misc_ctl);
|
||||
err_put_dfx_access:
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4159,15 +4252,23 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
|
||||
|
||||
fun_index = device * 8 + function;
|
||||
|
||||
ret = qm_func_shaper_enable(qm, fun_index, val);
|
||||
ret = qm_pm_get_sync(qm);
|
||||
if (ret) {
|
||||
pci_err(qm->pdev, "failed to enable function shaper!\n");
|
||||
ret = -EINVAL;
|
||||
goto err_get_status;
|
||||
}
|
||||
|
||||
ret = count;
|
||||
ret = qm_func_shaper_enable(qm, fun_index, val);
|
||||
if (ret) {
|
||||
pci_err(qm->pdev, "failed to enable function shaper!\n");
|
||||
ret = -EINVAL;
|
||||
goto err_put_sync;
|
||||
}
|
||||
|
||||
ret = count;
|
||||
|
||||
err_put_sync:
|
||||
qm_pm_put_sync(qm);
|
||||
err_get_status:
|
||||
clear_bit(QM_RESETTING, &qm->misc_ctl);
|
||||
return ret;
|
||||
@ -4245,7 +4346,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
|
||||
*/
|
||||
void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
|
||||
{
|
||||
struct qm_dfx_registers *regs;
|
||||
const struct debugfs_reg32 *regs;
|
||||
int i;
|
||||
|
||||
/* clear current_qm */
|
||||
@ -4264,7 +4365,7 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
|
||||
|
||||
regs = qm_dfx_regs;
|
||||
for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
|
||||
readl(qm->io_base + regs->reg_offset);
|
||||
readl(qm->io_base + regs->offset);
|
||||
regs++;
|
||||
}
|
||||
|
||||
@ -4287,19 +4388,23 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs)
|
||||
struct hisi_qm *qm = pci_get_drvdata(pdev);
|
||||
int pre_existing_vfs, num_vfs, total_vfs, ret;
|
||||
|
||||
ret = qm_pm_get_sync(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
total_vfs = pci_sriov_get_totalvfs(pdev);
|
||||
pre_existing_vfs = pci_num_vf(pdev);
|
||||
if (pre_existing_vfs) {
|
||||
pci_err(pdev, "%d VFs already enabled. Please disable pre-enabled VFs!\n",
|
||||
pre_existing_vfs);
|
||||
return 0;
|
||||
goto err_put_sync;
|
||||
}
|
||||
|
||||
num_vfs = min_t(int, max_vfs, total_vfs);
|
||||
ret = qm_vf_q_assign(qm, num_vfs);
|
||||
if (ret) {
|
||||
pci_err(pdev, "Can't assign queues for VF!\n");
|
||||
return ret;
|
||||
goto err_put_sync;
|
||||
}
|
||||
|
||||
qm->vfs_num = num_vfs;
|
||||
@ -4308,12 +4413,16 @@ int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs)
|
||||
if (ret) {
|
||||
pci_err(pdev, "Can't enable VF!\n");
|
||||
qm_clear_vft_config(qm);
|
||||
return ret;
|
||||
goto err_put_sync;
|
||||
}
|
||||
|
||||
pci_info(pdev, "VF enabled, vfs_num(=%d)!\n", num_vfs);
|
||||
|
||||
return num_vfs;
|
||||
|
||||
err_put_sync:
|
||||
qm_pm_put_sync(qm);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable);
|
||||
|
||||
@ -4328,6 +4437,7 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
|
||||
{
|
||||
struct hisi_qm *qm = pci_get_drvdata(pdev);
|
||||
int total_vfs = pci_sriov_get_totalvfs(qm->pdev);
|
||||
int ret;
|
||||
|
||||
if (pci_vfs_assigned(pdev)) {
|
||||
pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n");
|
||||
@ -4343,8 +4453,13 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
|
||||
pci_disable_sriov(pdev);
|
||||
/* clear vf function shaper configure array */
|
||||
memset(qm->factor + 1, 0, sizeof(struct qm_shaper_factor) * total_vfs);
|
||||
ret = qm_clear_vft_config(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return qm_clear_vft_config(qm);
|
||||
qm_pm_put_sync(qm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable);
|
||||
|
||||
@ -5164,11 +5279,18 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
|
||||
struct hisi_qm *qm = container_of(rst_work, struct hisi_qm, rst_work);
|
||||
int ret;
|
||||
|
||||
ret = qm_pm_get_sync(qm);
|
||||
if (ret) {
|
||||
clear_bit(QM_RST_SCHED, &qm->misc_ctl);
|
||||
return;
|
||||
}
|
||||
|
||||
/* reset pcie device controller */
|
||||
ret = qm_controller_reset(qm);
|
||||
if (ret)
|
||||
dev_err(&qm->pdev->dev, "controller reset failed (%d)\n", ret);
|
||||
|
||||
qm_pm_put_sync(qm);
|
||||
}
|
||||
|
||||
static void qm_pf_reset_vf_prepare(struct hisi_qm *qm,
|
||||
@ -5680,6 +5802,194 @@ int hisi_qm_init(struct hisi_qm *qm)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_init);
|
||||
|
||||
/**
|
||||
* hisi_qm_get_dfx_access() - Try to get dfx access.
|
||||
* @qm: pointer to accelerator device.
|
||||
*
|
||||
* Try to get dfx access, then user can get message.
|
||||
*
|
||||
* If device is in suspended, return failure, otherwise
|
||||
* bump up the runtime PM usage counter.
|
||||
*/
|
||||
int hisi_qm_get_dfx_access(struct hisi_qm *qm)
|
||||
{
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
|
||||
if (pm_runtime_suspended(dev)) {
|
||||
dev_info(dev, "can not read/write - device in suspended.\n");
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
return qm_pm_get_sync(qm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_get_dfx_access);
|
||||
|
||||
/**
|
||||
* hisi_qm_put_dfx_access() - Put dfx access.
|
||||
* @qm: pointer to accelerator device.
|
||||
*
|
||||
* Put dfx access, drop runtime PM usage counter.
|
||||
*/
|
||||
void hisi_qm_put_dfx_access(struct hisi_qm *qm)
|
||||
{
|
||||
qm_pm_put_sync(qm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_put_dfx_access);
|
||||
|
||||
/**
|
||||
* hisi_qm_pm_init() - Initialize qm runtime PM.
|
||||
* @qm: pointer to accelerator device.
|
||||
*
|
||||
* Function that initialize qm runtime PM.
|
||||
*/
|
||||
void hisi_qm_pm_init(struct hisi_qm *qm)
|
||||
{
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
|
||||
if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
|
||||
return;
|
||||
|
||||
pm_runtime_set_autosuspend_delay(dev, QM_AUTOSUSPEND_DELAY);
|
||||
pm_runtime_use_autosuspend(dev);
|
||||
pm_runtime_put_noidle(dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_pm_init);
|
||||
|
||||
/**
|
||||
* hisi_qm_pm_uninit() - Uninitialize qm runtime PM.
|
||||
* @qm: pointer to accelerator device.
|
||||
*
|
||||
* Function that uninitialize qm runtime PM.
|
||||
*/
|
||||
void hisi_qm_pm_uninit(struct hisi_qm *qm)
|
||||
{
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
|
||||
if (qm->fun_type == QM_HW_VF || qm->ver < QM_HW_V3)
|
||||
return;
|
||||
|
||||
pm_runtime_get_noresume(dev);
|
||||
pm_runtime_dont_use_autosuspend(dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_pm_uninit);
|
||||
|
||||
static int qm_prepare_for_suspend(struct hisi_qm *qm)
|
||||
{
|
||||
struct pci_dev *pdev = qm->pdev;
|
||||
int ret;
|
||||
u32 val;
|
||||
|
||||
ret = qm->ops->set_msi(qm, false);
|
||||
if (ret) {
|
||||
pci_err(pdev, "failed to disable MSI before suspending!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* shutdown OOO register */
|
||||
writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN,
|
||||
qm->io_base + ACC_MASTER_GLOBAL_CTRL);
|
||||
|
||||
ret = readl_relaxed_poll_timeout(qm->io_base + ACC_MASTER_TRANS_RETURN,
|
||||
val,
|
||||
(val == ACC_MASTER_TRANS_RETURN_RW),
|
||||
POLL_PERIOD, POLL_TIMEOUT);
|
||||
if (ret) {
|
||||
pci_emerg(pdev, "Bus lock! Please reset system.\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = qm_set_pf_mse(qm, false);
|
||||
if (ret)
|
||||
pci_err(pdev, "failed to disable MSE before suspending!\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int qm_rebuild_for_resume(struct hisi_qm *qm)
|
||||
{
|
||||
struct pci_dev *pdev = qm->pdev;
|
||||
int ret;
|
||||
|
||||
ret = qm_set_pf_mse(qm, true);
|
||||
if (ret) {
|
||||
pci_err(pdev, "failed to enable MSE after resuming!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = qm->ops->set_msi(qm, true);
|
||||
if (ret) {
|
||||
pci_err(pdev, "failed to enable MSI after resuming!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = qm_dev_hw_init(qm);
|
||||
if (ret) {
|
||||
pci_err(pdev, "failed to init device after resuming\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
qm_cmd_init(qm);
|
||||
hisi_qm_dev_err_init(qm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* hisi_qm_suspend() - Runtime suspend of given device.
|
||||
* @dev: device to suspend.
|
||||
*
|
||||
* Function that suspend the device.
|
||||
*/
|
||||
int hisi_qm_suspend(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct hisi_qm *qm = pci_get_drvdata(pdev);
|
||||
int ret;
|
||||
|
||||
pci_info(pdev, "entering suspended state\n");
|
||||
|
||||
ret = hisi_qm_stop(qm, QM_NORMAL);
|
||||
if (ret) {
|
||||
pci_err(pdev, "failed to stop qm(%d)\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = qm_prepare_for_suspend(qm);
|
||||
if (ret)
|
||||
pci_err(pdev, "failed to prepare suspended(%d)\n", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_suspend);
|
||||
|
||||
/**
|
||||
* hisi_qm_resume() - Runtime resume of given device.
|
||||
* @dev: device to resume.
|
||||
*
|
||||
* Function that resume the device.
|
||||
*/
|
||||
int hisi_qm_resume(struct device *dev)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct hisi_qm *qm = pci_get_drvdata(pdev);
|
||||
int ret;
|
||||
|
||||
pci_info(pdev, "resuming from suspend state\n");
|
||||
|
||||
ret = qm_rebuild_for_resume(qm);
|
||||
if (ret) {
|
||||
pci_err(pdev, "failed to rebuild resume(%d)\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = hisi_qm_start(qm);
|
||||
if (ret)
|
||||
pci_err(pdev, "failed to start qm(%d)\n", ret);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hisi_qm_resume);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
|
||||
MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver");
|
||||
|
@ -4,6 +4,7 @@
|
||||
#define HISI_ACC_QM_H
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/iopoll.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
@ -430,4 +431,11 @@ void hisi_qm_dev_shutdown(struct pci_dev *pdev);
|
||||
void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
|
||||
int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
|
||||
void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
|
||||
int hisi_qm_resume(struct device *dev);
|
||||
int hisi_qm_suspend(struct device *dev);
|
||||
void hisi_qm_pm_uninit(struct hisi_qm *qm);
|
||||
void hisi_qm_pm_init(struct hisi_qm *qm);
|
||||
int hisi_qm_get_dfx_access(struct hisi_qm *qm);
|
||||
void hisi_qm_put_dfx_access(struct hisi_qm *qm);
|
||||
void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset);
|
||||
#endif
|
||||
|
@ -157,11 +157,6 @@ struct sec_ctx {
|
||||
struct device *dev;
|
||||
};
|
||||
|
||||
enum sec_endian {
|
||||
SEC_LE = 0,
|
||||
SEC_32BE,
|
||||
SEC_64BE
|
||||
};
|
||||
|
||||
enum sec_debug_file_index {
|
||||
SEC_CLEAR_ENABLE,
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/uacce.h>
|
||||
@ -57,10 +58,16 @@
|
||||
#define SEC_MEM_START_INIT_REG 0x301100
|
||||
#define SEC_MEM_INIT_DONE_REG 0x301104
|
||||
|
||||
/* clock gating */
|
||||
#define SEC_CONTROL_REG 0x301200
|
||||
#define SEC_TRNG_EN_SHIFT 8
|
||||
#define SEC_DYNAMIC_GATE_REG 0x30121c
|
||||
#define SEC_CORE_AUTO_GATE 0x30212c
|
||||
#define SEC_DYNAMIC_GATE_EN 0x7bff
|
||||
#define SEC_CORE_AUTO_GATE_EN GENMASK(3, 0)
|
||||
#define SEC_CLK_GATE_ENABLE BIT(3)
|
||||
#define SEC_CLK_GATE_DISABLE (~BIT(3))
|
||||
|
||||
#define SEC_TRNG_EN_SHIFT 8
|
||||
#define SEC_AXI_SHUTDOWN_ENABLE BIT(12)
|
||||
#define SEC_AXI_SHUTDOWN_DISABLE 0xFFFFEFFF
|
||||
|
||||
@ -312,31 +319,20 @@ static const struct pci_device_id sec_dev_ids[] = {
|
||||
};
|
||||
MODULE_DEVICE_TABLE(pci, sec_dev_ids);
|
||||
|
||||
static u8 sec_get_endian(struct hisi_qm *qm)
|
||||
static void sec_set_endian(struct hisi_qm *qm)
|
||||
{
|
||||
u32 reg;
|
||||
|
||||
/*
|
||||
* As for VF, it is a wrong way to get endian setting by
|
||||
* reading a register of the engine
|
||||
*/
|
||||
if (qm->pdev->is_virtfn) {
|
||||
dev_err_ratelimited(&qm->pdev->dev,
|
||||
"cannot access a register in VF!\n");
|
||||
return SEC_LE;
|
||||
}
|
||||
reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
|
||||
/* BD little endian mode */
|
||||
if (!(reg & BIT(0)))
|
||||
return SEC_LE;
|
||||
reg &= ~(BIT(1) | BIT(0));
|
||||
if (!IS_ENABLED(CONFIG_64BIT))
|
||||
reg |= BIT(1);
|
||||
|
||||
/* BD 32-bits big endian mode */
|
||||
else if (!(reg & BIT(1)))
|
||||
return SEC_32BE;
|
||||
|
||||
/* BD 64-bits big endian mode */
|
||||
else
|
||||
return SEC_64BE;
|
||||
if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
|
||||
reg |= BIT(0);
|
||||
|
||||
writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
|
||||
}
|
||||
|
||||
static void sec_open_sva_prefetch(struct hisi_qm *qm)
|
||||
@ -378,15 +374,43 @@ static void sec_close_sva_prefetch(struct hisi_qm *qm)
|
||||
pci_err(qm->pdev, "failed to close sva prefetch\n");
|
||||
}
|
||||
|
||||
static void sec_enable_clock_gate(struct hisi_qm *qm)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
if (qm->ver < QM_HW_V3)
|
||||
return;
|
||||
|
||||
val = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
|
||||
val |= SEC_CLK_GATE_ENABLE;
|
||||
writel_relaxed(val, qm->io_base + SEC_CONTROL_REG);
|
||||
|
||||
val = readl(qm->io_base + SEC_DYNAMIC_GATE_REG);
|
||||
val |= SEC_DYNAMIC_GATE_EN;
|
||||
writel(val, qm->io_base + SEC_DYNAMIC_GATE_REG);
|
||||
|
||||
val = readl(qm->io_base + SEC_CORE_AUTO_GATE);
|
||||
val |= SEC_CORE_AUTO_GATE_EN;
|
||||
writel(val, qm->io_base + SEC_CORE_AUTO_GATE);
|
||||
}
|
||||
|
||||
static void sec_disable_clock_gate(struct hisi_qm *qm)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
/* Kunpeng920 needs to close clock gating */
|
||||
val = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
|
||||
val &= SEC_CLK_GATE_DISABLE;
|
||||
writel_relaxed(val, qm->io_base + SEC_CONTROL_REG);
|
||||
}
|
||||
|
||||
static int sec_engine_init(struct hisi_qm *qm)
|
||||
{
|
||||
int ret;
|
||||
u32 reg;
|
||||
|
||||
/* disable clock gate control */
|
||||
reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
|
||||
reg &= SEC_CLK_GATE_DISABLE;
|
||||
writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
|
||||
/* disable clock gate control before mem init */
|
||||
sec_disable_clock_gate(qm);
|
||||
|
||||
writel_relaxed(0x1, qm->io_base + SEC_MEM_START_INIT_REG);
|
||||
|
||||
@ -429,9 +453,9 @@ static int sec_engine_init(struct hisi_qm *qm)
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
|
||||
|
||||
/* config endian */
|
||||
reg = readl_relaxed(qm->io_base + SEC_CONTROL_REG);
|
||||
reg |= sec_get_endian(qm);
|
||||
writel_relaxed(reg, qm->io_base + SEC_CONTROL_REG);
|
||||
sec_set_endian(qm);
|
||||
|
||||
sec_enable_clock_gate(qm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -533,17 +557,14 @@ static void sec_hw_error_disable(struct hisi_qm *qm)
|
||||
writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG);
|
||||
}
|
||||
|
||||
static u32 sec_clear_enable_read(struct sec_debug_file *file)
|
||||
static u32 sec_clear_enable_read(struct hisi_qm *qm)
|
||||
{
|
||||
struct hisi_qm *qm = file->qm;
|
||||
|
||||
return readl(qm->io_base + SEC_CTRL_CNT_CLR_CE) &
|
||||
SEC_CTRL_CNT_CLR_CE_BIT;
|
||||
}
|
||||
|
||||
static int sec_clear_enable_write(struct sec_debug_file *file, u32 val)
|
||||
static int sec_clear_enable_write(struct hisi_qm *qm, u32 val)
|
||||
{
|
||||
struct hisi_qm *qm = file->qm;
|
||||
u32 tmp;
|
||||
|
||||
if (val != 1 && val)
|
||||
@ -561,24 +582,34 @@ static ssize_t sec_debug_read(struct file *filp, char __user *buf,
|
||||
{
|
||||
struct sec_debug_file *file = filp->private_data;
|
||||
char tbuf[SEC_DBGFS_VAL_MAX_LEN];
|
||||
struct hisi_qm *qm = file->qm;
|
||||
u32 val;
|
||||
int ret;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock_irq(&file->lock);
|
||||
|
||||
switch (file->index) {
|
||||
case SEC_CLEAR_ENABLE:
|
||||
val = sec_clear_enable_read(file);
|
||||
val = sec_clear_enable_read(qm);
|
||||
break;
|
||||
default:
|
||||
spin_unlock_irq(&file->lock);
|
||||
return -EINVAL;
|
||||
goto err_input;
|
||||
}
|
||||
|
||||
spin_unlock_irq(&file->lock);
|
||||
ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val);
|
||||
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
ret = snprintf(tbuf, SEC_DBGFS_VAL_MAX_LEN, "%u\n", val);
|
||||
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
|
||||
|
||||
err_input:
|
||||
spin_unlock_irq(&file->lock);
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
|
||||
@ -586,6 +617,7 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
|
||||
{
|
||||
struct sec_debug_file *file = filp->private_data;
|
||||
char tbuf[SEC_DBGFS_VAL_MAX_LEN];
|
||||
struct hisi_qm *qm = file->qm;
|
||||
unsigned long val;
|
||||
int len, ret;
|
||||
|
||||
@ -604,11 +636,15 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
|
||||
if (kstrtoul(tbuf, 0, &val))
|
||||
return -EFAULT;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock_irq(&file->lock);
|
||||
|
||||
switch (file->index) {
|
||||
case SEC_CLEAR_ENABLE:
|
||||
ret = sec_clear_enable_write(file, val);
|
||||
ret = sec_clear_enable_write(qm, val);
|
||||
if (ret)
|
||||
goto err_input;
|
||||
break;
|
||||
@ -617,12 +653,11 @@ static ssize_t sec_debug_write(struct file *filp, const char __user *buf,
|
||||
goto err_input;
|
||||
}
|
||||
|
||||
spin_unlock_irq(&file->lock);
|
||||
|
||||
return count;
|
||||
ret = count;
|
||||
|
||||
err_input:
|
||||
spin_unlock_irq(&file->lock);
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -653,6 +688,15 @@ static int sec_debugfs_atomic64_set(void *data, u64 val)
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get,
|
||||
sec_debugfs_atomic64_set, "%lld\n");
|
||||
|
||||
static int sec_regs_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
hisi_qm_regs_dump(s, s->private);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SHOW_ATTRIBUTE(sec_regs);
|
||||
|
||||
static int sec_core_debug_init(struct hisi_qm *qm)
|
||||
{
|
||||
struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
|
||||
@ -671,9 +715,10 @@ static int sec_core_debug_init(struct hisi_qm *qm)
|
||||
regset->regs = sec_dfx_regs;
|
||||
regset->nregs = ARRAY_SIZE(sec_dfx_regs);
|
||||
regset->base = qm->io_base;
|
||||
regset->dev = dev;
|
||||
|
||||
if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID)
|
||||
debugfs_create_regset32("regs", 0444, tmp_d, regset);
|
||||
debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) {
|
||||
atomic64_t *data = (atomic64_t *)((uintptr_t)dfx +
|
||||
@ -981,10 +1026,13 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
goto err_alg_unregister;
|
||||
}
|
||||
|
||||
hisi_qm_pm_init(qm);
|
||||
|
||||
return 0;
|
||||
|
||||
err_alg_unregister:
|
||||
hisi_qm_alg_unregister(qm, &sec_devices);
|
||||
if (qm->qp_num >= ctx_q_num)
|
||||
hisi_qm_alg_unregister(qm, &sec_devices);
|
||||
err_qm_stop:
|
||||
sec_debugfs_exit(qm);
|
||||
hisi_qm_stop(qm, QM_NORMAL);
|
||||
@ -999,6 +1047,7 @@ static void sec_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct hisi_qm *qm = pci_get_drvdata(pdev);
|
||||
|
||||
hisi_qm_pm_uninit(qm);
|
||||
hisi_qm_wait_task_finish(qm, &sec_devices);
|
||||
if (qm->qp_num >= ctx_q_num)
|
||||
hisi_qm_alg_unregister(qm, &sec_devices);
|
||||
@ -1018,6 +1067,10 @@ static void sec_remove(struct pci_dev *pdev)
|
||||
sec_qm_uninit(qm);
|
||||
}
|
||||
|
||||
static const struct dev_pm_ops sec_pm_ops = {
|
||||
SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
|
||||
};
|
||||
|
||||
static const struct pci_error_handlers sec_err_handler = {
|
||||
.error_detected = hisi_qm_dev_err_detected,
|
||||
.slot_reset = hisi_qm_dev_slot_reset,
|
||||
@ -1033,6 +1086,7 @@ static struct pci_driver sec_pci_driver = {
|
||||
.err_handler = &sec_err_handler,
|
||||
.sriov_configure = hisi_qm_sriov_configure,
|
||||
.shutdown = hisi_qm_dev_shutdown,
|
||||
.driver.pm = &sec_pm_ops,
|
||||
};
|
||||
|
||||
static void sec_register_debugfs(void)
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/uacce.h>
|
||||
@ -107,6 +108,14 @@
|
||||
#define HZIP_DELAY_1_US 1
|
||||
#define HZIP_POLL_TIMEOUT_US 1000
|
||||
|
||||
/* clock gating */
|
||||
#define HZIP_PEH_CFG_AUTO_GATE 0x3011A8
|
||||
#define HZIP_PEH_CFG_AUTO_GATE_EN BIT(0)
|
||||
#define HZIP_CORE_GATED_EN GENMASK(15, 8)
|
||||
#define HZIP_CORE_GATED_OOO_EN BIT(29)
|
||||
#define HZIP_CLOCK_GATED_EN (HZIP_CORE_GATED_EN | \
|
||||
HZIP_CORE_GATED_OOO_EN)
|
||||
|
||||
static const char hisi_zip_name[] = "hisi_zip";
|
||||
static struct dentry *hzip_debugfs_root;
|
||||
|
||||
@ -312,6 +321,22 @@ static void hisi_zip_close_sva_prefetch(struct hisi_qm *qm)
|
||||
pci_err(qm->pdev, "failed to close sva prefetch\n");
|
||||
}
|
||||
|
||||
static void hisi_zip_enable_clock_gate(struct hisi_qm *qm)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
if (qm->ver < QM_HW_V3)
|
||||
return;
|
||||
|
||||
val = readl(qm->io_base + HZIP_CLOCK_GATE_CTRL);
|
||||
val |= HZIP_CLOCK_GATED_EN;
|
||||
writel(val, qm->io_base + HZIP_CLOCK_GATE_CTRL);
|
||||
|
||||
val = readl(qm->io_base + HZIP_PEH_CFG_AUTO_GATE);
|
||||
val |= HZIP_PEH_CFG_AUTO_GATE_EN;
|
||||
writel(val, qm->io_base + HZIP_PEH_CFG_AUTO_GATE);
|
||||
}
|
||||
|
||||
static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
|
||||
{
|
||||
void __iomem *base = qm->io_base;
|
||||
@ -359,6 +384,8 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
|
||||
CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) |
|
||||
FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL);
|
||||
|
||||
hisi_zip_enable_clock_gate(qm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -423,17 +450,14 @@ static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
|
||||
return &hisi_zip->qm;
|
||||
}
|
||||
|
||||
static u32 clear_enable_read(struct ctrl_debug_file *file)
|
||||
static u32 clear_enable_read(struct hisi_qm *qm)
|
||||
{
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
|
||||
return readl(qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE) &
|
||||
HZIP_SOFT_CTRL_CNT_CLR_CE_BIT;
|
||||
}
|
||||
|
||||
static int clear_enable_write(struct ctrl_debug_file *file, u32 val)
|
||||
static int clear_enable_write(struct hisi_qm *qm, u32 val)
|
||||
{
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
u32 tmp;
|
||||
|
||||
if (val != 1 && val != 0)
|
||||
@ -450,22 +474,33 @@ static ssize_t hisi_zip_ctrl_debug_read(struct file *filp, char __user *buf,
|
||||
size_t count, loff_t *pos)
|
||||
{
|
||||
struct ctrl_debug_file *file = filp->private_data;
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
char tbuf[HZIP_BUF_SIZE];
|
||||
u32 val;
|
||||
int ret;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock_irq(&file->lock);
|
||||
switch (file->index) {
|
||||
case HZIP_CLEAR_ENABLE:
|
||||
val = clear_enable_read(file);
|
||||
val = clear_enable_read(qm);
|
||||
break;
|
||||
default:
|
||||
spin_unlock_irq(&file->lock);
|
||||
return -EINVAL;
|
||||
goto err_input;
|
||||
}
|
||||
spin_unlock_irq(&file->lock);
|
||||
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
ret = scnprintf(tbuf, sizeof(tbuf), "%u\n", val);
|
||||
return simple_read_from_buffer(buf, count, pos, tbuf, ret);
|
||||
|
||||
err_input:
|
||||
spin_unlock_irq(&file->lock);
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
|
||||
@ -473,6 +508,7 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
|
||||
size_t count, loff_t *pos)
|
||||
{
|
||||
struct ctrl_debug_file *file = filp->private_data;
|
||||
struct hisi_qm *qm = file_to_qm(file);
|
||||
char tbuf[HZIP_BUF_SIZE];
|
||||
unsigned long val;
|
||||
int len, ret;
|
||||
@ -491,10 +527,14 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
|
||||
if (kstrtoul(tbuf, 0, &val))
|
||||
return -EFAULT;
|
||||
|
||||
ret = hisi_qm_get_dfx_access(qm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock_irq(&file->lock);
|
||||
switch (file->index) {
|
||||
case HZIP_CLEAR_ENABLE:
|
||||
ret = clear_enable_write(file, val);
|
||||
ret = clear_enable_write(qm, val);
|
||||
if (ret)
|
||||
goto err_input;
|
||||
break;
|
||||
@ -502,12 +542,12 @@ static ssize_t hisi_zip_ctrl_debug_write(struct file *filp,
|
||||
ret = -EINVAL;
|
||||
goto err_input;
|
||||
}
|
||||
spin_unlock_irq(&file->lock);
|
||||
|
||||
return count;
|
||||
ret = count;
|
||||
|
||||
err_input:
|
||||
spin_unlock_irq(&file->lock);
|
||||
hisi_qm_put_dfx_access(qm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -538,6 +578,15 @@ static int zip_debugfs_atomic64_get(void *data, u64 *val)
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(zip_atomic64_ops, zip_debugfs_atomic64_get,
|
||||
zip_debugfs_atomic64_set, "%llu\n");
|
||||
|
||||
static int hisi_zip_regs_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
hisi_qm_regs_dump(s, s->private);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SHOW_ATTRIBUTE(hisi_zip_regs);
|
||||
|
||||
static int hisi_zip_core_debug_init(struct hisi_qm *qm)
|
||||
{
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
@ -560,9 +609,11 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm)
|
||||
regset->regs = hzip_dfx_regs;
|
||||
regset->nregs = ARRAY_SIZE(hzip_dfx_regs);
|
||||
regset->base = qm->io_base + core_offsets[i];
|
||||
regset->dev = dev;
|
||||
|
||||
tmp_d = debugfs_create_dir(buf, qm->debug.debug_root);
|
||||
debugfs_create_regset32("regs", 0444, tmp_d, regset);
|
||||
debugfs_create_file("regs", 0444, tmp_d, regset,
|
||||
&hisi_zip_regs_fops);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -898,6 +949,8 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
goto err_qm_alg_unregister;
|
||||
}
|
||||
|
||||
hisi_qm_pm_init(qm);
|
||||
|
||||
return 0;
|
||||
|
||||
err_qm_alg_unregister:
|
||||
@ -920,6 +973,7 @@ static void hisi_zip_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct hisi_qm *qm = pci_get_drvdata(pdev);
|
||||
|
||||
hisi_qm_pm_uninit(qm);
|
||||
hisi_qm_wait_task_finish(qm, &zip_devices);
|
||||
hisi_qm_alg_unregister(qm, &zip_devices);
|
||||
|
||||
@ -932,6 +986,10 @@ static void hisi_zip_remove(struct pci_dev *pdev)
|
||||
hisi_zip_qm_uninit(qm);
|
||||
}
|
||||
|
||||
static const struct dev_pm_ops hisi_zip_pm_ops = {
|
||||
SET_RUNTIME_PM_OPS(hisi_qm_suspend, hisi_qm_resume, NULL)
|
||||
};
|
||||
|
||||
static const struct pci_error_handlers hisi_zip_err_handler = {
|
||||
.error_detected = hisi_qm_dev_err_detected,
|
||||
.slot_reset = hisi_qm_dev_slot_reset,
|
||||
@ -948,6 +1006,7 @@ static struct pci_driver hisi_zip_pci_driver = {
|
||||
hisi_qm_sriov_configure : NULL,
|
||||
.err_handler = &hisi_zip_err_handler,
|
||||
.shutdown = hisi_qm_dev_shutdown,
|
||||
.driver.pm = &hisi_zip_pm_ops,
|
||||
};
|
||||
|
||||
static void hisi_zip_register_debugfs(void)
|
||||
|
@ -170,15 +170,19 @@ static struct dcp *global_sdcp;
|
||||
|
||||
static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
|
||||
{
|
||||
int dma_err;
|
||||
struct dcp *sdcp = global_sdcp;
|
||||
const int chan = actx->chan;
|
||||
uint32_t stat;
|
||||
unsigned long ret;
|
||||
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
|
||||
|
||||
dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc),
|
||||
DMA_TO_DEVICE);
|
||||
|
||||
dma_err = dma_mapping_error(sdcp->dev, desc_phys);
|
||||
if (dma_err)
|
||||
return dma_err;
|
||||
|
||||
reinit_completion(&sdcp->completion[chan]);
|
||||
|
||||
/* Clear status register. */
|
||||
@ -216,18 +220,29 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
|
||||
static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
|
||||
struct skcipher_request *req, int init)
|
||||
{
|
||||
dma_addr_t key_phys, src_phys, dst_phys;
|
||||
struct dcp *sdcp = global_sdcp;
|
||||
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
|
||||
struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
|
||||
int ret;
|
||||
|
||||
dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
|
||||
2 * AES_KEYSIZE_128,
|
||||
DMA_TO_DEVICE);
|
||||
dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
|
||||
DCP_BUF_SZ, DMA_TO_DEVICE);
|
||||
dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
|
||||
DCP_BUF_SZ, DMA_FROM_DEVICE);
|
||||
key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
|
||||
2 * AES_KEYSIZE_128, DMA_TO_DEVICE);
|
||||
ret = dma_mapping_error(sdcp->dev, key_phys);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
|
||||
DCP_BUF_SZ, DMA_TO_DEVICE);
|
||||
ret = dma_mapping_error(sdcp->dev, src_phys);
|
||||
if (ret)
|
||||
goto err_src;
|
||||
|
||||
dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
|
||||
DCP_BUF_SZ, DMA_FROM_DEVICE);
|
||||
ret = dma_mapping_error(sdcp->dev, dst_phys);
|
||||
if (ret)
|
||||
goto err_dst;
|
||||
|
||||
if (actx->fill % AES_BLOCK_SIZE) {
|
||||
dev_err(sdcp->dev, "Invalid block size!\n");
|
||||
@ -265,10 +280,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
|
||||
ret = mxs_dcp_start_dma(actx);
|
||||
|
||||
aes_done_run:
|
||||
dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
|
||||
err_dst:
|
||||
dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
|
||||
err_src:
|
||||
dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128,
|
||||
DMA_TO_DEVICE);
|
||||
dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
|
||||
dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -283,21 +300,20 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
|
||||
|
||||
struct scatterlist *dst = req->dst;
|
||||
struct scatterlist *src = req->src;
|
||||
const int nents = sg_nents(req->src);
|
||||
int dst_nents = sg_nents(dst);
|
||||
|
||||
const int out_off = DCP_BUF_SZ;
|
||||
uint8_t *in_buf = sdcp->coh->aes_in_buf;
|
||||
uint8_t *out_buf = sdcp->coh->aes_out_buf;
|
||||
|
||||
uint8_t *out_tmp, *src_buf, *dst_buf = NULL;
|
||||
uint32_t dst_off = 0;
|
||||
uint8_t *src_buf = NULL;
|
||||
uint32_t last_out_len = 0;
|
||||
|
||||
uint8_t *key = sdcp->coh->aes_key;
|
||||
|
||||
int ret = 0;
|
||||
int split = 0;
|
||||
unsigned int i, len, clen, rem = 0, tlen = 0;
|
||||
unsigned int i, len, clen, tlen = 0;
|
||||
int init = 0;
|
||||
bool limit_hit = false;
|
||||
|
||||
@ -315,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
|
||||
memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
|
||||
}
|
||||
|
||||
for_each_sg(req->src, src, nents, i) {
|
||||
for_each_sg(req->src, src, sg_nents(src), i) {
|
||||
src_buf = sg_virt(src);
|
||||
len = sg_dma_len(src);
|
||||
tlen += len;
|
||||
@ -340,34 +356,17 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
|
||||
* submit the buffer.
|
||||
*/
|
||||
if (actx->fill == out_off || sg_is_last(src) ||
|
||||
limit_hit) {
|
||||
limit_hit) {
|
||||
ret = mxs_dcp_run_aes(actx, req, init);
|
||||
if (ret)
|
||||
return ret;
|
||||
init = 0;
|
||||
|
||||
out_tmp = out_buf;
|
||||
sg_pcopy_from_buffer(dst, dst_nents, out_buf,
|
||||
actx->fill, dst_off);
|
||||
dst_off += actx->fill;
|
||||
last_out_len = actx->fill;
|
||||
while (dst && actx->fill) {
|
||||
if (!split) {
|
||||
dst_buf = sg_virt(dst);
|
||||
dst_off = 0;
|
||||
}
|
||||
rem = min(sg_dma_len(dst) - dst_off,
|
||||
actx->fill);
|
||||
|
||||
memcpy(dst_buf + dst_off, out_tmp, rem);
|
||||
out_tmp += rem;
|
||||
dst_off += rem;
|
||||
actx->fill -= rem;
|
||||
|
||||
if (dst_off == sg_dma_len(dst)) {
|
||||
dst = sg_next(dst);
|
||||
split = 0;
|
||||
} else {
|
||||
split = 1;
|
||||
}
|
||||
}
|
||||
actx->fill = 0;
|
||||
}
|
||||
} while (len);
|
||||
|
||||
@ -557,6 +556,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
|
||||
dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf,
|
||||
DCP_BUF_SZ, DMA_TO_DEVICE);
|
||||
|
||||
ret = dma_mapping_error(sdcp->dev, buf_phys);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Fill in the DMA descriptor. */
|
||||
desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE |
|
||||
MXS_DCP_CONTROL0_INTERRUPT |
|
||||
@ -589,6 +592,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req)
|
||||
if (rctx->fini) {
|
||||
digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf,
|
||||
DCP_SHA_PAY_SZ, DMA_FROM_DEVICE);
|
||||
ret = dma_mapping_error(sdcp->dev, digest_phys);
|
||||
if (ret)
|
||||
goto done_run;
|
||||
|
||||
desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM;
|
||||
desc->payload = digest_phys;
|
||||
}
|
||||
|
@ -1175,9 +1175,9 @@ static int omap_aes_probe(struct platform_device *pdev)
|
||||
spin_lock_init(&dd->lock);
|
||||
|
||||
INIT_LIST_HEAD(&dd->list);
|
||||
spin_lock(&list_lock);
|
||||
spin_lock_bh(&list_lock);
|
||||
list_add_tail(&dd->list, &dev_list);
|
||||
spin_unlock(&list_lock);
|
||||
spin_unlock_bh(&list_lock);
|
||||
|
||||
/* Initialize crypto engine */
|
||||
dd->engine = crypto_engine_alloc_init(dev, 1);
|
||||
@ -1264,9 +1264,9 @@ static int omap_aes_remove(struct platform_device *pdev)
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
spin_lock(&list_lock);
|
||||
spin_lock_bh(&list_lock);
|
||||
list_del(&dd->list);
|
||||
spin_unlock(&list_lock);
|
||||
spin_unlock_bh(&list_lock);
|
||||
|
||||
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
|
||||
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {
|
||||
|
@ -210,7 +210,7 @@ void omap_crypto_cleanup(struct scatterlist *sg, struct scatterlist *orig,
|
||||
buf = sg_virt(sg);
|
||||
pages = get_order(len);
|
||||
|
||||
if (orig && (flags & OMAP_CRYPTO_COPY_MASK))
|
||||
if (orig && (flags & OMAP_CRYPTO_DATA_COPIED))
|
||||
omap_crypto_copy_data(sg, orig, offset, len);
|
||||
|
||||
if (flags & OMAP_CRYPTO_DATA_COPIED)
|
||||
|
@ -1033,9 +1033,9 @@ static int omap_des_probe(struct platform_device *pdev)
|
||||
|
||||
|
||||
INIT_LIST_HEAD(&dd->list);
|
||||
spin_lock(&list_lock);
|
||||
spin_lock_bh(&list_lock);
|
||||
list_add_tail(&dd->list, &dev_list);
|
||||
spin_unlock(&list_lock);
|
||||
spin_unlock_bh(&list_lock);
|
||||
|
||||
/* Initialize des crypto engine */
|
||||
dd->engine = crypto_engine_alloc_init(dev, 1);
|
||||
@ -1094,9 +1094,9 @@ static int omap_des_remove(struct platform_device *pdev)
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
|
||||
spin_lock(&list_lock);
|
||||
spin_lock_bh(&list_lock);
|
||||
list_del(&dd->list);
|
||||
spin_unlock(&list_lock);
|
||||
spin_unlock_bh(&list_lock);
|
||||
|
||||
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
|
||||
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
|
||||
|
@ -105,7 +105,6 @@
|
||||
#define FLAGS_FINAL 1
|
||||
#define FLAGS_DMA_ACTIVE 2
|
||||
#define FLAGS_OUTPUT_READY 3
|
||||
#define FLAGS_INIT 4
|
||||
#define FLAGS_CPU 5
|
||||
#define FLAGS_DMA_READY 6
|
||||
#define FLAGS_AUTO_XOR 7
|
||||
@ -368,24 +367,6 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req)
|
||||
hash[i] = le32_to_cpup((__le32 *)in + i);
|
||||
}
|
||||
|
||||
static int omap_sham_hw_init(struct omap_sham_dev *dd)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = pm_runtime_resume_and_get(dd->dev);
|
||||
if (err < 0) {
|
||||
dev_err(dd->dev, "failed to get sync: %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (!test_bit(FLAGS_INIT, &dd->flags)) {
|
||||
set_bit(FLAGS_INIT, &dd->flags);
|
||||
dd->err = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void omap_sham_write_ctrl_omap2(struct omap_sham_dev *dd, size_t length,
|
||||
int final, int dma)
|
||||
{
|
||||
@ -1093,11 +1074,14 @@ static int omap_sham_hash_one_req(struct crypto_engine *engine, void *areq)
|
||||
dev_dbg(dd->dev, "hash-one: op: %u, total: %u, digcnt: %zd, final: %d",
|
||||
ctx->op, ctx->total, ctx->digcnt, final);
|
||||
|
||||
dd->req = req;
|
||||
|
||||
err = omap_sham_hw_init(dd);
|
||||
if (err)
|
||||
err = pm_runtime_resume_and_get(dd->dev);
|
||||
if (err < 0) {
|
||||
dev_err(dd->dev, "failed to get sync: %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
dd->err = 0;
|
||||
dd->req = req;
|
||||
|
||||
if (ctx->digcnt)
|
||||
dd->pdata->copy_hash(req, 0);
|
||||
@ -1736,7 +1720,7 @@ static void omap_sham_done_task(unsigned long data)
|
||||
if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags))
|
||||
goto finish;
|
||||
} else if (test_bit(FLAGS_DMA_READY, &dd->flags)) {
|
||||
if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
|
||||
if (test_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
|
||||
omap_sham_update_dma_stop(dd);
|
||||
if (dd->err) {
|
||||
err = dd->err;
|
||||
@ -2129,7 +2113,6 @@ static int omap_sham_probe(struct platform_device *pdev)
|
||||
dd->fallback_sz = OMAP_SHA_DMA_THRESHOLD;
|
||||
|
||||
pm_runtime_enable(dev);
|
||||
pm_runtime_irq_safe(dev);
|
||||
|
||||
err = pm_runtime_get_sync(dev);
|
||||
if (err < 0) {
|
||||
@ -2144,9 +2127,9 @@ static int omap_sham_probe(struct platform_device *pdev)
|
||||
(rev & dd->pdata->major_mask) >> dd->pdata->major_shift,
|
||||
(rev & dd->pdata->minor_mask) >> dd->pdata->minor_shift);
|
||||
|
||||
spin_lock(&sham.lock);
|
||||
spin_lock_bh(&sham.lock);
|
||||
list_add_tail(&dd->list, &sham.dev_list);
|
||||
spin_unlock(&sham.lock);
|
||||
spin_unlock_bh(&sham.lock);
|
||||
|
||||
dd->engine = crypto_engine_alloc_init(dev, 1);
|
||||
if (!dd->engine) {
|
||||
@ -2194,10 +2177,11 @@ static int omap_sham_probe(struct platform_device *pdev)
|
||||
err_engine_start:
|
||||
crypto_engine_exit(dd->engine);
|
||||
err_engine:
|
||||
spin_lock(&sham.lock);
|
||||
spin_lock_bh(&sham.lock);
|
||||
list_del(&dd->list);
|
||||
spin_unlock(&sham.lock);
|
||||
spin_unlock_bh(&sham.lock);
|
||||
err_pm:
|
||||
pm_runtime_dont_use_autosuspend(dev);
|
||||
pm_runtime_disable(dev);
|
||||
if (!dd->polling_mode)
|
||||
dma_release_channel(dd->dma_lch);
|
||||
@ -2215,9 +2199,9 @@ static int omap_sham_remove(struct platform_device *pdev)
|
||||
dd = platform_get_drvdata(pdev);
|
||||
if (!dd)
|
||||
return -ENODEV;
|
||||
spin_lock(&sham.lock);
|
||||
spin_lock_bh(&sham.lock);
|
||||
list_del(&dd->list);
|
||||
spin_unlock(&sham.lock);
|
||||
spin_unlock_bh(&sham.lock);
|
||||
for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
|
||||
for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {
|
||||
crypto_unregister_ahash(
|
||||
@ -2225,6 +2209,7 @@ static int omap_sham_remove(struct platform_device *pdev)
|
||||
dd->pdata->algs_info[i].registered--;
|
||||
}
|
||||
tasklet_kill(&dd->done_task);
|
||||
pm_runtime_dont_use_autosuspend(&pdev->dev);
|
||||
pm_runtime_disable(&pdev->dev);
|
||||
|
||||
if (!dd->polling_mode)
|
||||
@ -2235,32 +2220,11 @@ static int omap_sham_remove(struct platform_device *pdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
static int omap_sham_suspend(struct device *dev)
|
||||
{
|
||||
pm_runtime_put_sync(dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int omap_sham_resume(struct device *dev)
|
||||
{
|
||||
int err = pm_runtime_resume_and_get(dev);
|
||||
if (err < 0) {
|
||||
dev_err(dev, "failed to get sync: %d\n", err);
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static SIMPLE_DEV_PM_OPS(omap_sham_pm_ops, omap_sham_suspend, omap_sham_resume);
|
||||
|
||||
static struct platform_driver omap_sham_driver = {
|
||||
.probe = omap_sham_probe,
|
||||
.remove = omap_sham_remove,
|
||||
.driver = {
|
||||
.name = "omap-sham",
|
||||
.pm = &omap_sham_pm_ops,
|
||||
.of_match_table = omap_sham_of_match,
|
||||
},
|
||||
};
|
||||
|
@ -161,7 +161,7 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
|
||||
ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_MASK_OFFSET, 0);
|
||||
}
|
||||
|
||||
static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
|
||||
static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -210,21 +210,21 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
|
||||
hw_data->fw_mmp_name = ADF_4XXX_MMP;
|
||||
hw_data->init_admin_comms = adf_init_admin_comms;
|
||||
hw_data->exit_admin_comms = adf_exit_admin_comms;
|
||||
hw_data->disable_iov = adf_disable_sriov;
|
||||
hw_data->send_admin_init = adf_send_admin_init;
|
||||
hw_data->init_arb = adf_init_arb;
|
||||
hw_data->exit_arb = adf_exit_arb;
|
||||
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
|
||||
hw_data->enable_ints = adf_enable_ints;
|
||||
hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
|
||||
hw_data->reset_device = adf_reset_flr;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
|
||||
hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK;
|
||||
hw_data->uof_get_num_objs = uof_get_num_objs;
|
||||
hw_data->uof_get_name = uof_get_name;
|
||||
hw_data->uof_get_ae_mask = uof_get_ae_mask;
|
||||
hw_data->set_msix_rttable = set_msix_default_rttable;
|
||||
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
|
||||
hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
|
||||
hw_data->disable_iov = adf_disable_sriov;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
|
||||
|
||||
adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
|
||||
}
|
||||
|
@ -221,16 +221,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
}
|
||||
|
||||
/* Set DMA identifier */
|
||||
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration.\n");
|
||||
ret = -EFAULT;
|
||||
goto out_err;
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
}
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration.\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Get accelerator capabilities mask */
|
||||
|
@ -111,11 +111,6 @@ static u32 get_pf2vf_offset(u32 i)
|
||||
return ADF_C3XXX_PF2VF_OFFSET(i);
|
||||
}
|
||||
|
||||
static u32 get_vintmsk_offset(u32 i)
|
||||
{
|
||||
return ADF_C3XXX_VINTMSK_OFFSET(i);
|
||||
}
|
||||
|
||||
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
|
||||
@ -159,8 +154,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
|
||||
ADF_C3XXX_SMIA1_MASK);
|
||||
}
|
||||
|
||||
static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
|
||||
static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -193,8 +190,6 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
|
||||
hw_data->get_sram_bar_id = get_sram_bar_id;
|
||||
hw_data->get_etr_bar_id = get_etr_bar_id;
|
||||
hw_data->get_misc_bar_id = get_misc_bar_id;
|
||||
hw_data->get_pf2vf_offset = get_pf2vf_offset;
|
||||
hw_data->get_vintmsk_offset = get_vintmsk_offset;
|
||||
hw_data->get_admin_info = adf_gen2_get_admin_info;
|
||||
hw_data->get_arb_info = adf_gen2_get_arb_info;
|
||||
hw_data->get_sku = get_sku;
|
||||
@ -203,16 +198,18 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
|
||||
hw_data->init_admin_comms = adf_init_admin_comms;
|
||||
hw_data->exit_admin_comms = adf_exit_admin_comms;
|
||||
hw_data->configure_iov_threads = configure_iov_threads;
|
||||
hw_data->disable_iov = adf_disable_sriov;
|
||||
hw_data->send_admin_init = adf_send_admin_init;
|
||||
hw_data->init_arb = adf_init_arb;
|
||||
hw_data->exit_arb = adf_exit_arb;
|
||||
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
|
||||
hw_data->enable_ints = adf_enable_ints;
|
||||
hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
|
||||
hw_data->reset_device = adf_reset_flr;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
|
||||
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
|
||||
hw_data->get_pf2vf_offset = get_pf2vf_offset;
|
||||
hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
|
||||
hw_data->disable_iov = adf_disable_sriov;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
|
||||
|
||||
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,6 @@
|
||||
#define ADF_C3XXX_ERRSSMSH_EN BIT(3)
|
||||
|
||||
#define ADF_C3XXX_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
|
||||
#define ADF_C3XXX_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
|
||||
|
||||
/* AE to function mapping */
|
||||
#define ADF_C3XXX_AE2FUNC_MAP_GRP_A_NUM_REGS 48
|
||||
|
@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
}
|
||||
|
||||
/* set dma identifier */
|
||||
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
ret = -EFAULT;
|
||||
goto out_err_disable;
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
}
|
||||
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
goto out_err_disable;
|
||||
}
|
||||
|
||||
if (pci_request_regions(pdev, ADF_C3XXX_DEVICE_NAME)) {
|
||||
@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
if (pci_save_state(pdev)) {
|
||||
dev_err(&pdev->dev, "Failed to save pci state\n");
|
||||
ret = -ENOMEM;
|
||||
goto out_err_free_reg;
|
||||
goto out_err_disable_aer;
|
||||
}
|
||||
|
||||
ret = qat_crypto_dev_config(accel_dev);
|
||||
if (ret)
|
||||
goto out_err_free_reg;
|
||||
goto out_err_disable_aer;
|
||||
|
||||
ret = adf_dev_init(accel_dev);
|
||||
if (ret)
|
||||
@ -229,6 +222,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
adf_dev_stop(accel_dev);
|
||||
out_err_dev_shutdown:
|
||||
adf_dev_shutdown(accel_dev);
|
||||
out_err_disable_aer:
|
||||
adf_disable_aer(accel_dev);
|
||||
out_err_free_reg:
|
||||
pci_release_regions(accel_pci_dev->pci_dev);
|
||||
out_err_disable:
|
||||
|
@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
|
||||
return ADF_C3XXXIOV_PF2VF_OFFSET;
|
||||
}
|
||||
|
||||
static u32 get_vintmsk_offset(u32 i)
|
||||
{
|
||||
return ADF_C3XXXIOV_VINTMSK_OFFSET;
|
||||
}
|
||||
|
||||
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
return 0;
|
||||
@ -81,10 +76,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
|
||||
hw_data->enable_error_correction = adf_vf_void_noop;
|
||||
hw_data->init_admin_comms = adf_vf_int_noop;
|
||||
hw_data->exit_admin_comms = adf_vf_void_noop;
|
||||
hw_data->send_admin_init = adf_vf2pf_init;
|
||||
hw_data->send_admin_init = adf_vf2pf_notify_init;
|
||||
hw_data->init_arb = adf_vf_int_noop;
|
||||
hw_data->exit_arb = adf_vf_void_noop;
|
||||
hw_data->disable_iov = adf_vf2pf_shutdown;
|
||||
hw_data->disable_iov = adf_vf2pf_notify_shutdown;
|
||||
hw_data->get_accel_mask = get_accel_mask;
|
||||
hw_data->get_ae_mask = get_ae_mask;
|
||||
hw_data->get_num_accels = get_num_accels;
|
||||
@ -92,11 +87,10 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
|
||||
hw_data->get_etr_bar_id = get_etr_bar_id;
|
||||
hw_data->get_misc_bar_id = get_misc_bar_id;
|
||||
hw_data->get_pf2vf_offset = get_pf2vf_offset;
|
||||
hw_data->get_vintmsk_offset = get_vintmsk_offset;
|
||||
hw_data->get_sku = get_sku;
|
||||
hw_data->enable_ints = adf_vf_void_noop;
|
||||
hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
|
||||
hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
|
||||
hw_data->dev_class->instances++;
|
||||
adf_devmgr_update_class_index(hw_data);
|
||||
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
|
||||
|
@ -13,7 +13,6 @@
|
||||
#define ADF_C3XXXIOV_ETR_BAR 0
|
||||
#define ADF_C3XXXIOV_ETR_MAX_BANKS 1
|
||||
#define ADF_C3XXXIOV_PF2VF_OFFSET 0x200
|
||||
#define ADF_C3XXXIOV_VINTMSK_OFFSET 0x208
|
||||
|
||||
void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data);
|
||||
void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data);
|
||||
|
@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
}
|
||||
|
||||
/* set dma identifier */
|
||||
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
ret = -EFAULT;
|
||||
goto out_err_disable;
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
}
|
||||
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
goto out_err_disable;
|
||||
}
|
||||
|
||||
if (pci_request_regions(pdev, ADF_C3XXXVF_DEVICE_NAME)) {
|
||||
@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
|
||||
pr_err("QAT: Driver removal failed\n");
|
||||
return;
|
||||
}
|
||||
adf_flush_vf_wq(accel_dev);
|
||||
adf_dev_stop(accel_dev);
|
||||
adf_dev_shutdown(accel_dev);
|
||||
adf_cleanup_accel(accel_dev);
|
||||
|
@ -113,11 +113,6 @@ static u32 get_pf2vf_offset(u32 i)
|
||||
return ADF_C62X_PF2VF_OFFSET(i);
|
||||
}
|
||||
|
||||
static u32 get_vintmsk_offset(u32 i)
|
||||
{
|
||||
return ADF_C62X_VINTMSK_OFFSET(i);
|
||||
}
|
||||
|
||||
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
|
||||
@ -161,8 +156,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
|
||||
ADF_C62X_SMIA1_MASK);
|
||||
}
|
||||
|
||||
static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
|
||||
static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -195,8 +192,6 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
|
||||
hw_data->get_sram_bar_id = get_sram_bar_id;
|
||||
hw_data->get_etr_bar_id = get_etr_bar_id;
|
||||
hw_data->get_misc_bar_id = get_misc_bar_id;
|
||||
hw_data->get_pf2vf_offset = get_pf2vf_offset;
|
||||
hw_data->get_vintmsk_offset = get_vintmsk_offset;
|
||||
hw_data->get_admin_info = adf_gen2_get_admin_info;
|
||||
hw_data->get_arb_info = adf_gen2_get_arb_info;
|
||||
hw_data->get_sku = get_sku;
|
||||
@ -205,16 +200,18 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
|
||||
hw_data->init_admin_comms = adf_init_admin_comms;
|
||||
hw_data->exit_admin_comms = adf_exit_admin_comms;
|
||||
hw_data->configure_iov_threads = configure_iov_threads;
|
||||
hw_data->disable_iov = adf_disable_sriov;
|
||||
hw_data->send_admin_init = adf_send_admin_init;
|
||||
hw_data->init_arb = adf_init_arb;
|
||||
hw_data->exit_arb = adf_exit_arb;
|
||||
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
|
||||
hw_data->enable_ints = adf_enable_ints;
|
||||
hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
|
||||
hw_data->reset_device = adf_reset_flr;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
|
||||
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
|
||||
hw_data->get_pf2vf_offset = get_pf2vf_offset;
|
||||
hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
|
||||
hw_data->disable_iov = adf_disable_sriov;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
|
||||
|
||||
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,6 @@
|
||||
#define ADF_C62X_ERRSSMSH_EN BIT(3)
|
||||
|
||||
#define ADF_C62X_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
|
||||
#define ADF_C62X_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
|
||||
|
||||
/* AE to function mapping */
|
||||
#define ADF_C62X_AE2FUNC_MAP_GRP_A_NUM_REGS 80
|
||||
|
@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
}
|
||||
|
||||
/* set dma identifier */
|
||||
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
ret = -EFAULT;
|
||||
goto out_err_disable;
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
}
|
||||
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
goto out_err_disable;
|
||||
}
|
||||
|
||||
if (pci_request_regions(pdev, ADF_C62X_DEVICE_NAME)) {
|
||||
@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
if (pci_save_state(pdev)) {
|
||||
dev_err(&pdev->dev, "Failed to save pci state\n");
|
||||
ret = -ENOMEM;
|
||||
goto out_err_free_reg;
|
||||
goto out_err_disable_aer;
|
||||
}
|
||||
|
||||
ret = qat_crypto_dev_config(accel_dev);
|
||||
if (ret)
|
||||
goto out_err_free_reg;
|
||||
goto out_err_disable_aer;
|
||||
|
||||
ret = adf_dev_init(accel_dev);
|
||||
if (ret)
|
||||
@ -229,6 +222,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
adf_dev_stop(accel_dev);
|
||||
out_err_dev_shutdown:
|
||||
adf_dev_shutdown(accel_dev);
|
||||
out_err_disable_aer:
|
||||
adf_disable_aer(accel_dev);
|
||||
out_err_free_reg:
|
||||
pci_release_regions(accel_pci_dev->pci_dev);
|
||||
out_err_disable:
|
||||
|
@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
|
||||
return ADF_C62XIOV_PF2VF_OFFSET;
|
||||
}
|
||||
|
||||
static u32 get_vintmsk_offset(u32 i)
|
||||
{
|
||||
return ADF_C62XIOV_VINTMSK_OFFSET;
|
||||
}
|
||||
|
||||
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
return 0;
|
||||
@ -81,10 +76,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
|
||||
hw_data->enable_error_correction = adf_vf_void_noop;
|
||||
hw_data->init_admin_comms = adf_vf_int_noop;
|
||||
hw_data->exit_admin_comms = adf_vf_void_noop;
|
||||
hw_data->send_admin_init = adf_vf2pf_init;
|
||||
hw_data->send_admin_init = adf_vf2pf_notify_init;
|
||||
hw_data->init_arb = adf_vf_int_noop;
|
||||
hw_data->exit_arb = adf_vf_void_noop;
|
||||
hw_data->disable_iov = adf_vf2pf_shutdown;
|
||||
hw_data->disable_iov = adf_vf2pf_notify_shutdown;
|
||||
hw_data->get_accel_mask = get_accel_mask;
|
||||
hw_data->get_ae_mask = get_ae_mask;
|
||||
hw_data->get_num_accels = get_num_accels;
|
||||
@ -92,11 +87,10 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
|
||||
hw_data->get_etr_bar_id = get_etr_bar_id;
|
||||
hw_data->get_misc_bar_id = get_misc_bar_id;
|
||||
hw_data->get_pf2vf_offset = get_pf2vf_offset;
|
||||
hw_data->get_vintmsk_offset = get_vintmsk_offset;
|
||||
hw_data->get_sku = get_sku;
|
||||
hw_data->enable_ints = adf_vf_void_noop;
|
||||
hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
|
||||
hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
|
||||
hw_data->dev_class->instances++;
|
||||
adf_devmgr_update_class_index(hw_data);
|
||||
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
|
||||
|
@ -13,7 +13,6 @@
|
||||
#define ADF_C62XIOV_ETR_BAR 0
|
||||
#define ADF_C62XIOV_ETR_MAX_BANKS 1
|
||||
#define ADF_C62XIOV_PF2VF_OFFSET 0x200
|
||||
#define ADF_C62XIOV_VINTMSK_OFFSET 0x208
|
||||
|
||||
void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data);
|
||||
void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data);
|
||||
|
@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
}
|
||||
|
||||
/* set dma identifier */
|
||||
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
ret = -EFAULT;
|
||||
goto out_err_disable;
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
}
|
||||
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
goto out_err_disable;
|
||||
}
|
||||
|
||||
if (pci_request_regions(pdev, ADF_C62XVF_DEVICE_NAME)) {
|
||||
@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
|
||||
pr_err("QAT: Driver removal failed\n");
|
||||
return;
|
||||
}
|
||||
adf_flush_vf_wq(accel_dev);
|
||||
adf_dev_stop(accel_dev);
|
||||
adf_dev_shutdown(accel_dev);
|
||||
adf_cleanup_accel(accel_dev);
|
||||
|
@ -18,8 +18,6 @@
|
||||
#define ADF_4XXX_DEVICE_NAME "4xxx"
|
||||
#define ADF_4XXX_PCI_DEVICE_ID 0x4940
|
||||
#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941
|
||||
#define ADF_ERRSOU3 (0x3A000 + 0x0C)
|
||||
#define ADF_ERRSOU5 (0x3A000 + 0xD8)
|
||||
#define ADF_DEVICE_FUSECTL_OFFSET 0x40
|
||||
#define ADF_DEVICE_LEGFUSE_OFFSET 0x4C
|
||||
#define ADF_DEVICE_FUSECTL_MASK 0x80000000
|
||||
@ -156,7 +154,6 @@ struct adf_hw_device_data {
|
||||
u32 (*get_num_aes)(struct adf_hw_device_data *self);
|
||||
u32 (*get_num_accels)(struct adf_hw_device_data *self);
|
||||
u32 (*get_pf2vf_offset)(u32 i);
|
||||
u32 (*get_vintmsk_offset)(u32 i);
|
||||
void (*get_arb_info)(struct arb_info *arb_csrs_info);
|
||||
void (*get_admin_info)(struct admin_info *admin_csrs_info);
|
||||
enum dev_sku_info (*get_sku)(struct adf_hw_device_data *self);
|
||||
@ -174,7 +171,7 @@ struct adf_hw_device_data {
|
||||
bool enable);
|
||||
void (*enable_ints)(struct adf_accel_dev *accel_dev);
|
||||
void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev);
|
||||
int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev);
|
||||
int (*enable_pfvf_comms)(struct adf_accel_dev *accel_dev);
|
||||
void (*reset_device)(struct adf_accel_dev *accel_dev);
|
||||
void (*set_msix_rttable)(struct adf_accel_dev *accel_dev);
|
||||
char *(*uof_get_name)(u32 obj_num);
|
||||
@ -227,7 +224,6 @@ struct adf_fw_loader_data {
|
||||
|
||||
struct adf_accel_vf_info {
|
||||
struct adf_accel_dev *accel_dev;
|
||||
struct tasklet_struct vf2pf_bh_tasklet;
|
||||
struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */
|
||||
struct ratelimit_state vf2pf_ratelimit;
|
||||
u32 vf_nr;
|
||||
@ -249,6 +245,8 @@ struct adf_accel_dev {
|
||||
struct adf_accel_pci accel_pci_dev;
|
||||
union {
|
||||
struct {
|
||||
/* protects VF2PF interrupts access */
|
||||
spinlock_t vf2pf_ints_lock;
|
||||
/* vf_info is non-zero when SR-IOV is init'ed */
|
||||
struct adf_accel_vf_info *vf_info;
|
||||
} pf;
|
||||
|
@ -194,7 +194,7 @@ int adf_enable_aer(struct adf_accel_dev *accel_dev)
|
||||
EXPORT_SYMBOL_GPL(adf_enable_aer);
|
||||
|
||||
/**
|
||||
* adf_disable_aer() - Enable Advance Error Reporting for acceleration device
|
||||
* adf_disable_aer() - Disable Advance Error Reporting for acceleration device
|
||||
* @accel_dev: Pointer to acceleration device.
|
||||
*
|
||||
* Function disables PCI Advance Error Reporting for the
|
||||
|
@ -193,22 +193,23 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
|
||||
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
|
||||
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
|
||||
u32 vf_mask);
|
||||
void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev,
|
||||
u32 vf_mask);
|
||||
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
|
||||
u32 vf_mask);
|
||||
void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
|
||||
void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev);
|
||||
void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info);
|
||||
|
||||
int adf_vf2pf_init(struct adf_accel_dev *accel_dev);
|
||||
void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev);
|
||||
int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev);
|
||||
void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev);
|
||||
int adf_init_pf_wq(void);
|
||||
void adf_exit_pf_wq(void);
|
||||
int adf_init_vf_wq(void);
|
||||
void adf_exit_vf_wq(void);
|
||||
void adf_flush_vf_wq(struct adf_accel_dev *accel_dev);
|
||||
#else
|
||||
static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#define adf_sriov_configure NULL
|
||||
|
||||
static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
@ -222,12 +223,12 @@ static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
|
||||
static inline int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
|
||||
static inline void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
}
|
||||
|
||||
@ -249,5 +250,9 @@ static inline void adf_exit_vf_wq(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@ -61,6 +61,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
|
||||
struct service_hndl *service;
|
||||
struct list_head *list_itr;
|
||||
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
|
||||
int ret;
|
||||
|
||||
if (!hw_data) {
|
||||
dev_err(&GET_DEV(accel_dev),
|
||||
@ -88,8 +89,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
hw_data->enable_ints(accel_dev);
|
||||
|
||||
if (adf_ae_init(accel_dev)) {
|
||||
dev_err(&GET_DEV(accel_dev),
|
||||
"Failed to initialise Acceleration Engine\n");
|
||||
@ -110,6 +109,13 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
|
||||
}
|
||||
set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status);
|
||||
|
||||
hw_data->enable_ints(accel_dev);
|
||||
hw_data->enable_error_correction(accel_dev);
|
||||
|
||||
ret = hw_data->enable_pfvf_comms(accel_dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Subservice initialisation is divided into two stages: init and start.
|
||||
* This is to facilitate any ordering dependencies between services
|
||||
@ -126,9 +132,6 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
|
||||
set_bit(accel_dev->accel_id, service->init_status);
|
||||
}
|
||||
|
||||
hw_data->enable_error_correction(accel_dev);
|
||||
hw_data->enable_vf2pf_comms(accel_dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(adf_dev_init);
|
||||
|
@ -15,6 +15,14 @@
|
||||
#include "adf_transport_access_macros.h"
|
||||
#include "adf_transport_internal.h"
|
||||
|
||||
#define ADF_MAX_NUM_VFS 32
|
||||
#define ADF_ERRSOU3 (0x3A000 + 0x0C)
|
||||
#define ADF_ERRSOU5 (0x3A000 + 0xD8)
|
||||
#define ADF_ERRMSK3 (0x3A000 + 0x1C)
|
||||
#define ADF_ERRMSK5 (0x3A000 + 0xDC)
|
||||
#define ADF_ERR_REG_VF2PF_L(vf_src) (((vf_src) & 0x01FFFE00) >> 9)
|
||||
#define ADF_ERR_REG_VF2PF_U(vf_src) (((vf_src) & 0x0000FFFF) << 16)
|
||||
|
||||
static int adf_enable_msix(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
|
||||
@ -71,14 +79,23 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
|
||||
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
|
||||
struct adf_bar *pmisc =
|
||||
&GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)];
|
||||
void __iomem *pmisc_bar_addr = pmisc->virt_addr;
|
||||
u32 vf_mask;
|
||||
void __iomem *pmisc_addr = pmisc->virt_addr;
|
||||
u32 errsou3, errsou5, errmsk3, errmsk5;
|
||||
unsigned long vf_mask;
|
||||
|
||||
/* Get the interrupt sources triggered by VFs */
|
||||
vf_mask = ((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU5) &
|
||||
0x0000FFFF) << 16) |
|
||||
((ADF_CSR_RD(pmisc_bar_addr, ADF_ERRSOU3) &
|
||||
0x01FFFE00) >> 9);
|
||||
errsou3 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU3);
|
||||
errsou5 = ADF_CSR_RD(pmisc_addr, ADF_ERRSOU5);
|
||||
vf_mask = ADF_ERR_REG_VF2PF_L(errsou3);
|
||||
vf_mask |= ADF_ERR_REG_VF2PF_U(errsou5);
|
||||
|
||||
/* To avoid adding duplicate entries to work queue, clear
|
||||
* vf_int_mask_sets bits that are already masked in ERRMSK register.
|
||||
*/
|
||||
errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK3);
|
||||
errmsk5 = ADF_CSR_RD(pmisc_addr, ADF_ERRMSK5);
|
||||
vf_mask &= ~ADF_ERR_REG_VF2PF_L(errmsk3);
|
||||
vf_mask &= ~ADF_ERR_REG_VF2PF_U(errmsk5);
|
||||
|
||||
if (vf_mask) {
|
||||
struct adf_accel_vf_info *vf_info;
|
||||
@ -86,15 +103,13 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
|
||||
int i;
|
||||
|
||||
/* Disable VF2PF interrupts for VFs with pending ints */
|
||||
adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
|
||||
adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask);
|
||||
|
||||
/*
|
||||
* Schedule tasklets to handle VF2PF interrupt BHs
|
||||
* unless the VF is malicious and is attempting to
|
||||
* flood the host OS with VF2PF interrupts.
|
||||
* Handle VF2PF interrupt unless the VF is malicious and
|
||||
* is attempting to flood the host OS with VF2PF interrupts.
|
||||
*/
|
||||
for_each_set_bit(i, (const unsigned long *)&vf_mask,
|
||||
(sizeof(vf_mask) * BITS_PER_BYTE)) {
|
||||
for_each_set_bit(i, &vf_mask, ADF_MAX_NUM_VFS) {
|
||||
vf_info = accel_dev->pf.vf_info + i;
|
||||
|
||||
if (!__ratelimit(&vf_info->vf2pf_ratelimit)) {
|
||||
@ -104,8 +119,7 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Tasklet will re-enable ints from this VF */
|
||||
tasklet_hi_schedule(&vf_info->vf2pf_bh_tasklet);
|
||||
adf_schedule_vf2pf_handler(vf_info);
|
||||
irq_handled = true;
|
||||
}
|
||||
|
||||
|
@ -11,28 +11,8 @@
|
||||
#define ADF_DH895XCC_ERRMSK5 (ADF_DH895XCC_EP_OFFSET + 0xDC)
|
||||
#define ADF_DH895XCC_ERRMSK5_VF2PF_U_MASK(vf_mask) (vf_mask >> 16)
|
||||
|
||||
void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
|
||||
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
|
||||
void __iomem *pmisc_bar_addr =
|
||||
pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
|
||||
|
||||
ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x0);
|
||||
}
|
||||
|
||||
void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
|
||||
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
|
||||
void __iomem *pmisc_bar_addr =
|
||||
pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
|
||||
|
||||
ADF_CSR_WR(pmisc_bar_addr, hw_data->get_vintmsk_offset(0), 0x2);
|
||||
}
|
||||
|
||||
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
|
||||
u32 vf_mask)
|
||||
static void __adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
|
||||
u32 vf_mask)
|
||||
{
|
||||
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
|
||||
struct adf_bar *pmisc =
|
||||
@ -55,7 +35,17 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
|
||||
}
|
||||
}
|
||||
|
||||
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
|
||||
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
|
||||
__adf_enable_vf2pf_interrupts(accel_dev, vf_mask);
|
||||
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
|
||||
}
|
||||
|
||||
static void __adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
|
||||
u32 vf_mask)
|
||||
{
|
||||
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
|
||||
struct adf_bar *pmisc =
|
||||
@ -78,6 +68,22 @@ void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
|
||||
}
|
||||
}
|
||||
|
||||
void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
|
||||
__adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
|
||||
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
|
||||
}
|
||||
|
||||
void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, u32 vf_mask)
|
||||
{
|
||||
spin_lock(&accel_dev->pf.vf2pf_ints_lock);
|
||||
__adf_disable_vf2pf_interrupts(accel_dev, vf_mask);
|
||||
spin_unlock(&accel_dev->pf.vf2pf_ints_lock);
|
||||
}
|
||||
|
||||
static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
|
||||
{
|
||||
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
|
||||
@ -186,7 +192,6 @@ int adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr)
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(adf_iov_putmsg);
|
||||
|
||||
void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
|
||||
{
|
||||
@ -216,7 +221,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
|
||||
resp = (ADF_PF2VF_MSGORIGIN_SYSTEM |
|
||||
(ADF_PF2VF_MSGTYPE_VERSION_RESP <<
|
||||
ADF_PF2VF_MSGTYPE_SHIFT) |
|
||||
(ADF_PFVF_COMPATIBILITY_VERSION <<
|
||||
(ADF_PFVF_COMPAT_THIS_VERSION <<
|
||||
ADF_PF2VF_VERSION_RESP_VERS_SHIFT));
|
||||
|
||||
dev_dbg(&GET_DEV(accel_dev),
|
||||
@ -226,19 +231,19 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
|
||||
if (vf_compat_ver < hw_data->min_iov_compat_ver) {
|
||||
dev_err(&GET_DEV(accel_dev),
|
||||
"VF (vers %d) incompatible with PF (vers %d)\n",
|
||||
vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
|
||||
vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
|
||||
resp |= ADF_PF2VF_VF_INCOMPATIBLE <<
|
||||
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
|
||||
} else if (vf_compat_ver > ADF_PFVF_COMPATIBILITY_VERSION) {
|
||||
} else if (vf_compat_ver > ADF_PFVF_COMPAT_THIS_VERSION) {
|
||||
dev_err(&GET_DEV(accel_dev),
|
||||
"VF (vers %d) compat with PF (vers %d) unkn.\n",
|
||||
vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
|
||||
vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
|
||||
resp |= ADF_PF2VF_VF_COMPAT_UNKNOWN <<
|
||||
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
|
||||
} else {
|
||||
dev_dbg(&GET_DEV(accel_dev),
|
||||
"VF (vers %d) compatible with PF (vers %d)\n",
|
||||
vf_compat_ver, ADF_PFVF_COMPATIBILITY_VERSION);
|
||||
vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
|
||||
resp |= ADF_PF2VF_VF_COMPATIBLE <<
|
||||
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
|
||||
}
|
||||
@ -251,7 +256,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
|
||||
resp = (ADF_PF2VF_MSGORIGIN_SYSTEM |
|
||||
(ADF_PF2VF_MSGTYPE_VERSION_RESP <<
|
||||
ADF_PF2VF_MSGTYPE_SHIFT) |
|
||||
(ADF_PFVF_COMPATIBILITY_VERSION <<
|
||||
(ADF_PFVF_COMPAT_THIS_VERSION <<
|
||||
ADF_PF2VF_VERSION_RESP_VERS_SHIFT));
|
||||
resp |= ADF_PF2VF_VF_COMPATIBLE <<
|
||||
ADF_PF2VF_VERSION_RESP_RESULT_SHIFT;
|
||||
@ -284,6 +289,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info)
|
||||
|
||||
/* re-enable interrupt on PF from this VF */
|
||||
adf_enable_vf2pf_interrupts(accel_dev, (1 << vf_nr));
|
||||
|
||||
return;
|
||||
err:
|
||||
dev_dbg(&GET_DEV(accel_dev), "Unknown message from VF%d (0x%x);\n",
|
||||
@ -313,8 +319,10 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
|
||||
|
||||
msg = ADF_VF2PF_MSGORIGIN_SYSTEM;
|
||||
msg |= ADF_VF2PF_MSGTYPE_COMPAT_VER_REQ << ADF_VF2PF_MSGTYPE_SHIFT;
|
||||
msg |= ADF_PFVF_COMPATIBILITY_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT;
|
||||
BUILD_BUG_ON(ADF_PFVF_COMPATIBILITY_VERSION > 255);
|
||||
msg |= ADF_PFVF_COMPAT_THIS_VERSION << ADF_VF2PF_COMPAT_VER_REQ_SHIFT;
|
||||
BUILD_BUG_ON(ADF_PFVF_COMPAT_THIS_VERSION > 255);
|
||||
|
||||
reinit_completion(&accel_dev->vf.iov_msg_completion);
|
||||
|
||||
/* Send request from VF to PF */
|
||||
ret = adf_iov_putmsg(accel_dev, msg, 0);
|
||||
@ -338,14 +346,16 @@ static int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev)
|
||||
break;
|
||||
case ADF_PF2VF_VF_COMPAT_UNKNOWN:
|
||||
/* VF is newer than PF and decides whether it is compatible */
|
||||
if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver)
|
||||
if (accel_dev->vf.pf_version >= hw_data->min_iov_compat_ver) {
|
||||
accel_dev->vf.compatible = ADF_PF2VF_VF_COMPATIBLE;
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
case ADF_PF2VF_VF_INCOMPATIBLE:
|
||||
dev_err(&GET_DEV(accel_dev),
|
||||
"PF (vers %d) and VF (vers %d) are not compatible\n",
|
||||
accel_dev->vf.pf_version,
|
||||
ADF_PFVF_COMPATIBILITY_VERSION);
|
||||
ADF_PFVF_COMPAT_THIS_VERSION);
|
||||
return -EINVAL;
|
||||
default:
|
||||
dev_err(&GET_DEV(accel_dev),
|
||||
|
@ -52,7 +52,7 @@
|
||||
* IN_USE_BY pattern as part of a collision control scheme (see adf_iov_putmsg).
|
||||
*/
|
||||
|
||||
#define ADF_PFVF_COMPATIBILITY_VERSION 0x1 /* PF<->VF compat */
|
||||
#define ADF_PFVF_COMPAT_THIS_VERSION 0x1 /* PF<->VF compat */
|
||||
|
||||
/* PF->VF messages */
|
||||
#define ADF_PF2VF_INT BIT(0)
|
||||
|
@ -24,9 +24,8 @@ static void adf_iov_send_resp(struct work_struct *work)
|
||||
kfree(pf2vf_resp);
|
||||
}
|
||||
|
||||
static void adf_vf2pf_bh_handler(void *data)
|
||||
void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info)
|
||||
{
|
||||
struct adf_accel_vf_info *vf_info = (struct adf_accel_vf_info *)data;
|
||||
struct adf_pf2vf_resp *pf2vf_resp;
|
||||
|
||||
pf2vf_resp = kzalloc(sizeof(*pf2vf_resp), GFP_ATOMIC);
|
||||
@ -52,9 +51,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
|
||||
vf_info->accel_dev = accel_dev;
|
||||
vf_info->vf_nr = i;
|
||||
|
||||
tasklet_init(&vf_info->vf2pf_bh_tasklet,
|
||||
(void *)adf_vf2pf_bh_handler,
|
||||
(unsigned long)vf_info);
|
||||
mutex_init(&vf_info->pf2vf_lock);
|
||||
ratelimit_state_init(&vf_info->vf2pf_ratelimit,
|
||||
DEFAULT_RATELIMIT_INTERVAL,
|
||||
@ -110,8 +106,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
|
||||
hw_data->configure_iov_threads(accel_dev, false);
|
||||
|
||||
for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) {
|
||||
tasklet_disable(&vf->vf2pf_bh_tasklet);
|
||||
tasklet_kill(&vf->vf2pf_bh_tasklet);
|
||||
mutex_destroy(&vf->pf2vf_lock);
|
||||
}
|
||||
|
||||
|
@ -5,14 +5,14 @@
|
||||
#include "adf_pf2vf_msg.h"
|
||||
|
||||
/**
|
||||
* adf_vf2pf_init() - send init msg to PF
|
||||
* adf_vf2pf_notify_init() - send init msg to PF
|
||||
* @accel_dev: Pointer to acceleration VF device.
|
||||
*
|
||||
* Function sends an init message from the VF to a PF
|
||||
*
|
||||
* Return: 0 on success, error code otherwise.
|
||||
*/
|
||||
int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
|
||||
int adf_vf2pf_notify_init(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM |
|
||||
(ADF_VF2PF_MSGTYPE_INIT << ADF_VF2PF_MSGTYPE_SHIFT));
|
||||
@ -25,17 +25,17 @@ int adf_vf2pf_init(struct adf_accel_dev *accel_dev)
|
||||
set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(adf_vf2pf_init);
|
||||
EXPORT_SYMBOL_GPL(adf_vf2pf_notify_init);
|
||||
|
||||
/**
|
||||
* adf_vf2pf_shutdown() - send shutdown msg to PF
|
||||
* adf_vf2pf_notify_shutdown() - send shutdown msg to PF
|
||||
* @accel_dev: Pointer to acceleration VF device.
|
||||
*
|
||||
* Function sends a shutdown message from the VF to a PF
|
||||
*
|
||||
* Return: void
|
||||
*/
|
||||
void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
|
||||
void adf_vf2pf_notify_shutdown(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
u32 msg = (ADF_VF2PF_MSGORIGIN_SYSTEM |
|
||||
(ADF_VF2PF_MSGTYPE_SHUTDOWN << ADF_VF2PF_MSGTYPE_SHIFT));
|
||||
@ -45,4 +45,4 @@ void adf_vf2pf_shutdown(struct adf_accel_dev *accel_dev)
|
||||
dev_err(&GET_DEV(accel_dev),
|
||||
"Failed to send Shutdown event to PF\n");
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(adf_vf2pf_shutdown);
|
||||
EXPORT_SYMBOL_GPL(adf_vf2pf_notify_shutdown);
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "adf_pf2vf_msg.h"
|
||||
|
||||
#define ADF_VINTSOU_OFFSET 0x204
|
||||
#define ADF_VINTMSK_OFFSET 0x208
|
||||
#define ADF_VINTSOU_BUN BIT(0)
|
||||
#define ADF_VINTSOU_PF2VF BIT(1)
|
||||
|
||||
@ -28,6 +29,27 @@ struct adf_vf_stop_data {
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
|
||||
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
|
||||
void __iomem *pmisc_bar_addr =
|
||||
pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
|
||||
|
||||
ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x0);
|
||||
}
|
||||
|
||||
void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
|
||||
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
|
||||
void __iomem *pmisc_bar_addr =
|
||||
pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)].virt_addr;
|
||||
|
||||
ADF_CSR_WR(pmisc_bar_addr, ADF_VINTMSK_OFFSET, 0x2);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(adf_disable_pf2vf_interrupts);
|
||||
|
||||
static int adf_enable_msi(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
|
||||
@ -160,11 +182,21 @@ static irqreturn_t adf_isr(int irq, void *privdata)
|
||||
struct adf_bar *pmisc =
|
||||
&GET_BARS(accel_dev)[hw_data->get_misc_bar_id(hw_data)];
|
||||
void __iomem *pmisc_bar_addr = pmisc->virt_addr;
|
||||
u32 v_int;
|
||||
bool handled = false;
|
||||
u32 v_int, v_mask;
|
||||
|
||||
/* Read VF INT source CSR to determine the source of VF interrupt */
|
||||
v_int = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTSOU_OFFSET);
|
||||
|
||||
/* Read VF INT mask CSR to determine which sources are masked */
|
||||
v_mask = ADF_CSR_RD(pmisc_bar_addr, ADF_VINTMSK_OFFSET);
|
||||
|
||||
/*
|
||||
* Recompute v_int ignoring sources that are masked. This is to
|
||||
* avoid rescheduling the tasklet for interrupts already handled
|
||||
*/
|
||||
v_int &= ~v_mask;
|
||||
|
||||
/* Check for PF2VF interrupt */
|
||||
if (v_int & ADF_VINTSOU_PF2VF) {
|
||||
/* Disable PF to VF interrupt */
|
||||
@ -172,7 +204,7 @@ static irqreturn_t adf_isr(int irq, void *privdata)
|
||||
|
||||
/* Schedule tasklet to handle interrupt BH */
|
||||
tasklet_hi_schedule(&accel_dev->vf.pf2vf_bh_tasklet);
|
||||
return IRQ_HANDLED;
|
||||
handled = true;
|
||||
}
|
||||
|
||||
/* Check bundle interrupt */
|
||||
@ -184,10 +216,10 @@ static irqreturn_t adf_isr(int irq, void *privdata)
|
||||
csr_ops->write_csr_int_flag_and_col(bank->csr_addr,
|
||||
bank->bank_number, 0);
|
||||
tasklet_hi_schedule(&bank->resp_handler);
|
||||
return IRQ_HANDLED;
|
||||
handled = true;
|
||||
}
|
||||
|
||||
return IRQ_NONE;
|
||||
return handled ? IRQ_HANDLED : IRQ_NONE;
|
||||
}
|
||||
|
||||
static int adf_request_msi_irq(struct adf_accel_dev *accel_dev)
|
||||
@ -285,6 +317,30 @@ int adf_vf_isr_resource_alloc(struct adf_accel_dev *accel_dev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc);
|
||||
|
||||
/**
|
||||
* adf_flush_vf_wq() - Flush workqueue for VF
|
||||
* @accel_dev: Pointer to acceleration device.
|
||||
*
|
||||
* Function disables the PF/VF interrupts on the VF so that no new messages
|
||||
* are received and flushes the workqueue 'adf_vf_stop_wq'.
|
||||
*
|
||||
* Return: void.
|
||||
*/
|
||||
void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
adf_disable_pf2vf_interrupts(accel_dev);
|
||||
|
||||
flush_workqueue(adf_vf_stop_wq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(adf_flush_vf_wq);
|
||||
|
||||
/**
|
||||
* adf_init_vf_wq() - Init workqueue for VF
|
||||
*
|
||||
* Function init workqueue 'adf_vf_stop_wq' for VF.
|
||||
*
|
||||
* Return: 0 on success, error code otherwise.
|
||||
*/
|
||||
int __init adf_init_vf_wq(void)
|
||||
{
|
||||
adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0);
|
||||
|
@ -131,11 +131,6 @@ static u32 get_pf2vf_offset(u32 i)
|
||||
return ADF_DH895XCC_PF2VF_OFFSET(i);
|
||||
}
|
||||
|
||||
static u32 get_vintmsk_offset(u32 i)
|
||||
{
|
||||
return ADF_DH895XCC_VINTMSK_OFFSET(i);
|
||||
}
|
||||
|
||||
static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
struct adf_hw_device_data *hw_device = accel_dev->hw_device;
|
||||
@ -180,8 +175,10 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev)
|
||||
ADF_DH895XCC_SMIA1_MASK);
|
||||
}
|
||||
|
||||
static int adf_pf_enable_vf2pf_comms(struct adf_accel_dev *accel_dev)
|
||||
static int adf_enable_pf2vf_comms(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
spin_lock_init(&accel_dev->pf.vf2pf_ints_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -213,8 +210,6 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
|
||||
hw_data->get_num_aes = get_num_aes;
|
||||
hw_data->get_etr_bar_id = get_etr_bar_id;
|
||||
hw_data->get_misc_bar_id = get_misc_bar_id;
|
||||
hw_data->get_pf2vf_offset = get_pf2vf_offset;
|
||||
hw_data->get_vintmsk_offset = get_vintmsk_offset;
|
||||
hw_data->get_admin_info = adf_gen2_get_admin_info;
|
||||
hw_data->get_arb_info = adf_gen2_get_arb_info;
|
||||
hw_data->get_sram_bar_id = get_sram_bar_id;
|
||||
@ -224,15 +219,17 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
|
||||
hw_data->init_admin_comms = adf_init_admin_comms;
|
||||
hw_data->exit_admin_comms = adf_exit_admin_comms;
|
||||
hw_data->configure_iov_threads = configure_iov_threads;
|
||||
hw_data->disable_iov = adf_disable_sriov;
|
||||
hw_data->send_admin_init = adf_send_admin_init;
|
||||
hw_data->init_arb = adf_init_arb;
|
||||
hw_data->exit_arb = adf_exit_arb;
|
||||
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
|
||||
hw_data->enable_ints = adf_enable_ints;
|
||||
hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms;
|
||||
hw_data->reset_device = adf_reset_sbr;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
|
||||
hw_data->get_pf2vf_offset = get_pf2vf_offset;
|
||||
hw_data->enable_pfvf_comms = adf_enable_pf2vf_comms;
|
||||
hw_data->disable_iov = adf_disable_sriov;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
|
||||
|
||||
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,6 @@
|
||||
#define ADF_DH895XCC_ERRSSMSH_EN BIT(3)
|
||||
|
||||
#define ADF_DH895XCC_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
|
||||
#define ADF_DH895XCC_VINTMSK_OFFSET(i) (0x3A000 + 0x200 + ((i) * 0x04))
|
||||
|
||||
/* AE to function mapping */
|
||||
#define ADF_DH895XCC_AE2FUNC_MAP_GRP_A_NUM_REGS 96
|
||||
|
@ -159,17 +159,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
}
|
||||
|
||||
/* set dma identifier */
|
||||
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
ret = -EFAULT;
|
||||
goto out_err_disable;
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
}
|
||||
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
goto out_err_disable;
|
||||
}
|
||||
|
||||
if (pci_request_regions(pdev, ADF_DH895XCC_DEVICE_NAME)) {
|
||||
@ -208,12 +201,12 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
if (pci_save_state(pdev)) {
|
||||
dev_err(&pdev->dev, "Failed to save pci state\n");
|
||||
ret = -ENOMEM;
|
||||
goto out_err_free_reg;
|
||||
goto out_err_disable_aer;
|
||||
}
|
||||
|
||||
ret = qat_crypto_dev_config(accel_dev);
|
||||
if (ret)
|
||||
goto out_err_free_reg;
|
||||
goto out_err_disable_aer;
|
||||
|
||||
ret = adf_dev_init(accel_dev);
|
||||
if (ret)
|
||||
@ -229,6 +222,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
adf_dev_stop(accel_dev);
|
||||
out_err_dev_shutdown:
|
||||
adf_dev_shutdown(accel_dev);
|
||||
out_err_disable_aer:
|
||||
adf_disable_aer(accel_dev);
|
||||
out_err_free_reg:
|
||||
pci_release_regions(accel_pci_dev->pci_dev);
|
||||
out_err_disable:
|
||||
|
@ -52,11 +52,6 @@ static u32 get_pf2vf_offset(u32 i)
|
||||
return ADF_DH895XCCIOV_PF2VF_OFFSET;
|
||||
}
|
||||
|
||||
static u32 get_vintmsk_offset(u32 i)
|
||||
{
|
||||
return ADF_DH895XCCIOV_VINTMSK_OFFSET;
|
||||
}
|
||||
|
||||
static int adf_vf_int_noop(struct adf_accel_dev *accel_dev)
|
||||
{
|
||||
return 0;
|
||||
@ -81,10 +76,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
|
||||
hw_data->enable_error_correction = adf_vf_void_noop;
|
||||
hw_data->init_admin_comms = adf_vf_int_noop;
|
||||
hw_data->exit_admin_comms = adf_vf_void_noop;
|
||||
hw_data->send_admin_init = adf_vf2pf_init;
|
||||
hw_data->send_admin_init = adf_vf2pf_notify_init;
|
||||
hw_data->init_arb = adf_vf_int_noop;
|
||||
hw_data->exit_arb = adf_vf_void_noop;
|
||||
hw_data->disable_iov = adf_vf2pf_shutdown;
|
||||
hw_data->disable_iov = adf_vf2pf_notify_shutdown;
|
||||
hw_data->get_accel_mask = get_accel_mask;
|
||||
hw_data->get_ae_mask = get_ae_mask;
|
||||
hw_data->get_num_accels = get_num_accels;
|
||||
@ -92,11 +87,10 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
|
||||
hw_data->get_etr_bar_id = get_etr_bar_id;
|
||||
hw_data->get_misc_bar_id = get_misc_bar_id;
|
||||
hw_data->get_pf2vf_offset = get_pf2vf_offset;
|
||||
hw_data->get_vintmsk_offset = get_vintmsk_offset;
|
||||
hw_data->get_sku = get_sku;
|
||||
hw_data->enable_ints = adf_vf_void_noop;
|
||||
hw_data->enable_vf2pf_comms = adf_enable_vf2pf_comms;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION;
|
||||
hw_data->enable_pfvf_comms = adf_enable_vf2pf_comms;
|
||||
hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION;
|
||||
hw_data->dev_class->instances++;
|
||||
adf_devmgr_update_class_index(hw_data);
|
||||
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
|
||||
|
@ -13,7 +13,6 @@
|
||||
#define ADF_DH895XCCIOV_ETR_BAR 0
|
||||
#define ADF_DH895XCCIOV_ETR_MAX_BANKS 1
|
||||
#define ADF_DH895XCCIOV_PF2VF_OFFSET 0x200
|
||||
#define ADF_DH895XCCIOV_VINTMSK_OFFSET 0x208
|
||||
|
||||
void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data);
|
||||
void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data);
|
||||
|
@ -141,17 +141,10 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
}
|
||||
|
||||
/* set dma identifier */
|
||||
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
ret = -EFAULT;
|
||||
goto out_err_disable;
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
}
|
||||
|
||||
} else {
|
||||
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev, "No usable DMA configuration\n");
|
||||
goto out_err_disable;
|
||||
}
|
||||
|
||||
if (pci_request_regions(pdev, ADF_DH895XCCVF_DEVICE_NAME)) {
|
||||
@ -218,6 +211,7 @@ static void adf_remove(struct pci_dev *pdev)
|
||||
pr_err("QAT: Driver removal failed\n");
|
||||
return;
|
||||
}
|
||||
adf_flush_vf_wq(accel_dev);
|
||||
adf_dev_stop(accel_dev);
|
||||
adf_dev_shutdown(accel_dev);
|
||||
adf_cleanup_accel(accel_dev);
|
||||
|
@ -187,9 +187,9 @@ static int virtcrypto_init_vqs(struct virtio_crypto *vi)
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
virtcrypto_set_affinity(vi);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <asm/archrandom.h>
|
||||
|
||||
static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
|
||||
@ -42,3 +43,19 @@ u32 arm_smccc_get_version(void)
|
||||
return smccc_version;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arm_smccc_get_version);
|
||||
|
||||
static int __init smccc_devices_init(void)
|
||||
{
|
||||
struct platform_device *pdev;
|
||||
|
||||
if (smccc_trng_available) {
|
||||
pdev = platform_device_register_simple("smccc_trng", -1,
|
||||
NULL, 0);
|
||||
if (IS_ERR(pdev))
|
||||
pr_err("smccc_trng: could not register device: %ld\n",
|
||||
PTR_ERR(pdev));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(smccc_devices_init);
|
||||
|
@ -3,6 +3,7 @@
|
||||
/*
|
||||
* Common values for the SM4 algorithm
|
||||
* Copyright (C) 2018 ARM Limited or its affiliates.
|
||||
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_SM4_H
|
||||
@ -15,17 +16,29 @@
|
||||
#define SM4_BLOCK_SIZE 16
|
||||
#define SM4_RKEY_WORDS 32
|
||||
|
||||
struct crypto_sm4_ctx {
|
||||
struct sm4_ctx {
|
||||
u32 rkey_enc[SM4_RKEY_WORDS];
|
||||
u32 rkey_dec[SM4_RKEY_WORDS];
|
||||
};
|
||||
|
||||
int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len);
|
||||
int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
|
||||
/**
|
||||
* sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
|
||||
* @ctx: The location where the computed key will be stored.
|
||||
* @in_key: The supplied key.
|
||||
* @key_len: The length of the supplied key.
|
||||
*
|
||||
* Returns 0 on success. The function fails only if an invalid key size (or
|
||||
* pointer) is supplied.
|
||||
*/
|
||||
int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len);
|
||||
|
||||
void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
|
||||
void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
|
||||
/**
|
||||
* sm4_crypt_block - Encrypt or decrypt a single SM4 block
|
||||
* @rk: The rkey_enc for encrypt or rkey_dec for decrypt
|
||||
* @out: Buffer to store output data
|
||||
* @in: Buffer containing the input data
|
||||
*/
|
||||
void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in);
|
||||
|
||||
#endif
|
||||
|
@ -12,6 +12,7 @@
|
||||
#ifndef PADATA_H
|
||||
#define PADATA_H
|
||||
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/compiler_types.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/spinlock.h>
|
||||
@ -96,7 +97,7 @@ struct parallel_data {
|
||||
struct padata_shell *ps;
|
||||
struct padata_list __percpu *reorder_list;
|
||||
struct padata_serial_queue __percpu *squeue;
|
||||
atomic_t refcnt;
|
||||
refcount_t refcnt;
|
||||
unsigned int seq_nr;
|
||||
unsigned int processed;
|
||||
int cpu;
|
||||
|
@ -9,19 +9,6 @@
|
||||
*
|
||||
* Copyright (c) 2020 Oracle and/or its affiliates.
|
||||
* Author: Daniel Jordan <daniel.m.jordan@oracle.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <linux/completion.h>
|
||||
@ -211,7 +198,7 @@ int padata_do_parallel(struct padata_shell *ps,
|
||||
if ((pinst->flags & PADATA_RESET))
|
||||
goto out;
|
||||
|
||||
atomic_inc(&pd->refcnt);
|
||||
refcount_inc(&pd->refcnt);
|
||||
padata->pd = pd;
|
||||
padata->cb_cpu = *cb_cpu;
|
||||
|
||||
@ -383,7 +370,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
|
||||
}
|
||||
local_bh_enable();
|
||||
|
||||
if (atomic_sub_and_test(cnt, &pd->refcnt))
|
||||
if (refcount_sub_and_test(cnt, &pd->refcnt))
|
||||
padata_free_pd(pd);
|
||||
}
|
||||
|
||||
@ -593,7 +580,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
|
||||
padata_init_reorder_list(pd);
|
||||
padata_init_squeues(pd);
|
||||
pd->seq_nr = -1;
|
||||
atomic_set(&pd->refcnt, 1);
|
||||
refcount_set(&pd->refcnt, 1);
|
||||
spin_lock_init(&pd->lock);
|
||||
pd->cpu = cpumask_first(pd->cpumask.pcpu);
|
||||
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
|
||||
@ -667,7 +654,7 @@ static int padata_replace(struct padata_instance *pinst)
|
||||
synchronize_rcu();
|
||||
|
||||
list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
|
||||
if (atomic_dec_and_test(&ps->opd->refcnt))
|
||||
if (refcount_dec_and_test(&ps->opd->refcnt))
|
||||
padata_free_pd(ps->opd);
|
||||
|
||||
pinst->flags &= ~PADATA_RESET;
|
||||
@ -733,7 +720,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
|
||||
struct cpumask *serial_mask, *parallel_mask;
|
||||
int err = -EINVAL;
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
mutex_lock(&pinst->lock);
|
||||
|
||||
switch (cpumask_type) {
|
||||
@ -753,7 +740,7 @@ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
|
||||
|
||||
out:
|
||||
mutex_unlock(&pinst->lock);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -992,7 +979,7 @@ struct padata_instance *padata_alloc(const char *name)
|
||||
if (!pinst->parallel_wq)
|
||||
goto err_free_inst;
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
|
||||
pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
|
||||
WQ_CPU_INTENSIVE, 1, name);
|
||||
@ -1026,7 +1013,7 @@ struct padata_instance *padata_alloc(const char *name)
|
||||
&pinst->cpu_dead_node);
|
||||
#endif
|
||||
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
|
||||
return pinst;
|
||||
|
||||
@ -1036,7 +1023,7 @@ struct padata_instance *padata_alloc(const char *name)
|
||||
err_free_serial_wq:
|
||||
destroy_workqueue(pinst->serial_wq);
|
||||
err_put_cpus:
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
destroy_workqueue(pinst->parallel_wq);
|
||||
err_free_inst:
|
||||
kfree(pinst);
|
||||
@ -1074,9 +1061,9 @@ struct padata_shell *padata_alloc_shell(struct padata_instance *pinst)
|
||||
|
||||
ps->pinst = pinst;
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
pd = padata_alloc_pd(ps);
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
|
||||
if (!pd)
|
||||
goto out_free_ps;
|
||||
|
@ -128,3 +128,6 @@ config CRYPTO_LIB_CHACHA20POLY1305
|
||||
|
||||
config CRYPTO_LIB_SHA256
|
||||
tristate
|
||||
|
||||
config CRYPTO_LIB_SM4
|
||||
tristate
|
||||
|
@ -38,6 +38,9 @@ libpoly1305-y += poly1305.o
|
||||
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
|
||||
libsha256-y := sha256.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o
|
||||
libsm4-y := sm4.o
|
||||
|
||||
ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
|
||||
libblake2s-y += blake2s-selftest.o
|
||||
libchacha20poly1305-y += chacha20poly1305-selftest.o
|
||||
|
@ -73,7 +73,7 @@ void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
|
||||
}
|
||||
EXPORT_SYMBOL(blake2s256_hmac);
|
||||
|
||||
static int __init mod_init(void)
|
||||
static int __init blake2s_mod_init(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
|
||||
WARN_ON(!blake2s_selftest()))
|
||||
@ -81,12 +81,12 @@ static int __init mod_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
static void __exit blake2s_mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
module_init(blake2s_mod_init);
|
||||
module_exit(blake2s_mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("BLAKE2s hash function");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
|
@ -354,7 +354,7 @@ bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len
|
||||
}
|
||||
EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
|
||||
|
||||
static int __init mod_init(void)
|
||||
static int __init chacha20poly1305_init(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
|
||||
WARN_ON(!chacha20poly1305_selftest()))
|
||||
@ -362,12 +362,12 @@ static int __init mod_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
static void __exit chacha20poly1305_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
module_init(chacha20poly1305_init);
|
||||
module_exit(chacha20poly1305_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
static int __init mod_init(void)
|
||||
static int __init curve25519_init(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
|
||||
WARN_ON(!curve25519_selftest()))
|
||||
@ -21,12 +21,12 @@ static int __init mod_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
static void __exit curve25519_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
module_init(curve25519_init);
|
||||
module_exit(curve25519_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("Curve25519 scalar multiplication");
|
||||
|
176
lib/crypto/sm4.c
Normal file
176
lib/crypto/sm4.c
Normal file
@ -0,0 +1,176 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* SM4, as specified in
|
||||
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
|
||||
*
|
||||
* Copyright (C) 2018 ARM Limited or its affiliates.
|
||||
* Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/sm4.h>
|
||||
|
||||
static const u32 fk[4] = {
|
||||
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
|
||||
};
|
||||
|
||||
static const u32 __cacheline_aligned ck[32] = {
|
||||
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
|
||||
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
|
||||
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
|
||||
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
|
||||
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
|
||||
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
|
||||
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
|
||||
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
|
||||
};
|
||||
|
||||
static const u8 __cacheline_aligned sbox[256] = {
|
||||
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
|
||||
0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
|
||||
0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
|
||||
0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
|
||||
0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
|
||||
0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
|
||||
0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
|
||||
0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
|
||||
0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
|
||||
0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
|
||||
0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
|
||||
0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
|
||||
0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
|
||||
0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
|
||||
0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
|
||||
0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
|
||||
0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
|
||||
0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
|
||||
0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
|
||||
0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
|
||||
0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
|
||||
0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
|
||||
0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
|
||||
0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
|
||||
0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
|
||||
0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
|
||||
0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
|
||||
0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
|
||||
0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
|
||||
0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
|
||||
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
|
||||
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
|
||||
};
|
||||
|
||||
static inline u32 sm4_t_non_lin_sub(u32 x)
|
||||
{
|
||||
u32 out;
|
||||
|
||||
out = (u32)sbox[x & 0xff];
|
||||
out |= (u32)sbox[(x >> 8) & 0xff] << 8;
|
||||
out |= (u32)sbox[(x >> 16) & 0xff] << 16;
|
||||
out |= (u32)sbox[(x >> 24) & 0xff] << 24;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static inline u32 sm4_key_lin_sub(u32 x)
|
||||
{
|
||||
return x ^ rol32(x, 13) ^ rol32(x, 23);
|
||||
}
|
||||
|
||||
static inline u32 sm4_enc_lin_sub(u32 x)
|
||||
{
|
||||
return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
|
||||
}
|
||||
|
||||
static inline u32 sm4_key_sub(u32 x)
|
||||
{
|
||||
return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
|
||||
}
|
||||
|
||||
static inline u32 sm4_enc_sub(u32 x)
|
||||
{
|
||||
return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
|
||||
}
|
||||
|
||||
static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk)
|
||||
{
|
||||
return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
|
||||
* @ctx: The location where the computed key will be stored.
|
||||
* @in_key: The supplied key.
|
||||
* @key_len: The length of the supplied key.
|
||||
*
|
||||
* Returns 0 on success. The function fails only if an invalid key size (or
|
||||
* pointer) is supplied.
|
||||
*/
|
||||
int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
u32 rk[4];
|
||||
const u32 *key = (u32 *)in_key;
|
||||
int i;
|
||||
|
||||
if (key_len != SM4_KEY_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
rk[0] = get_unaligned_be32(&key[0]) ^ fk[0];
|
||||
rk[1] = get_unaligned_be32(&key[1]) ^ fk[1];
|
||||
rk[2] = get_unaligned_be32(&key[2]) ^ fk[2];
|
||||
rk[3] = get_unaligned_be32(&key[3]) ^ fk[3];
|
||||
|
||||
for (i = 0; i < 32; i += 4) {
|
||||
rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
|
||||
rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
|
||||
rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
|
||||
rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
|
||||
|
||||
ctx->rkey_enc[i + 0] = rk[0];
|
||||
ctx->rkey_enc[i + 1] = rk[1];
|
||||
ctx->rkey_enc[i + 2] = rk[2];
|
||||
ctx->rkey_enc[i + 3] = rk[3];
|
||||
ctx->rkey_dec[31 - 0 - i] = rk[0];
|
||||
ctx->rkey_dec[31 - 1 - i] = rk[1];
|
||||
ctx->rkey_dec[31 - 2 - i] = rk[2];
|
||||
ctx->rkey_dec[31 - 3 - i] = rk[3];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_expandkey);
|
||||
|
||||
/**
|
||||
* sm4_crypt_block - Encrypt or decrypt a single SM4 block
|
||||
* @rk: The rkey_enc for encrypt or rkey_dec for decrypt
|
||||
* @out: Buffer to store output data
|
||||
* @in: Buffer containing the input data
|
||||
*/
|
||||
void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in)
|
||||
{
|
||||
u32 x[4], i;
|
||||
|
||||
x[0] = get_unaligned_be32(in + 0 * 4);
|
||||
x[1] = get_unaligned_be32(in + 1 * 4);
|
||||
x[2] = get_unaligned_be32(in + 2 * 4);
|
||||
x[3] = get_unaligned_be32(in + 3 * 4);
|
||||
|
||||
for (i = 0; i < 32; i += 4) {
|
||||
x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
|
||||
x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
|
||||
x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
|
||||
x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
|
||||
}
|
||||
|
||||
put_unaligned_be32(x[3 - 0], out + 0 * 4);
|
||||
put_unaligned_be32(x[3 - 1], out + 1 * 4);
|
||||
put_unaligned_be32(x[3 - 2], out + 2 * 4);
|
||||
put_unaligned_be32(x[3 - 3], out + 3 * 4);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sm4_crypt_block);
|
||||
|
||||
MODULE_DESCRIPTION("Generic SM4 library");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -148,7 +148,7 @@ int mpi_resize(MPI a, unsigned nlimbs)
|
||||
return 0; /* no need to do it */
|
||||
|
||||
if (a->d) {
|
||||
p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
|
||||
p = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
|
||||
|
Loading…
Reference in New Issue
Block a user