linux-stable/arch/arm64/crypto/sm4-ce-glue.c
Tianjia Zhang 5b33e0ec88 crypto: arm64/sm4 - add ARMv8 Crypto Extensions implementation
This adds ARMv8 implementations of SM4 in ECB, CBC, CFB and CTR
modes using Crypto Extensions, also includes key expansion operations
because the Crypto Extensions instruction is much faster than software
implementations.

The Crypto Extensions for SM4 can only run on ARMv8 implementations
that have support for these optional extensions.

Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218
mode of tcrypt. The abscissas are blocks of different lengths. The
data is tabulated and the unit is Mb/s:

sm4-generic |     16       64      128      256     1024     1420     4096
    ECB enc |  80.05    91.42    93.66    94.77    95.69    95.77    95.86
    ECB dec |  79.98    91.41    93.64    94.76    95.66    95.77    95.85
    CBC enc |  78.55    86.50    88.02    88.77    89.36    89.42    89.48
    CBC dec |  76.82    89.06    91.52    92.77    93.75    93.83    93.96
    CFB enc |  77.64    86.13    87.62    88.42    89.08    88.83    89.18
    CFB dec |  77.57    88.34    90.36    91.45    92.34    92.00    92.44
    CTR enc |  77.80    88.28    90.23    91.22    92.11    91.81    92.25
    CTR dec |  77.83    88.22    90.22    91.22    92.04    91.82    92.28
sm4-neon
    ECB enc |  28.31   112.77   203.03   209.89   215.49   202.11   210.59
    ECB dec |  28.36   113.45   203.23   210.00   215.52   202.13   210.65
    CBC enc |  79.32    87.02    88.51    89.28    89.85    89.89    89.97
    CBC dec |  28.29   112.20   203.30   209.82   214.99   201.51   209.95
    CFB enc |  79.59    87.16    88.54    89.30    89.83    89.62    89.92
    CFB dec |  28.12   111.05   202.47   209.02   214.21   210.90   209.12
    CTR enc |  28.04   108.81   200.62   206.65   211.78   208.78   206.74
    CTR dec |  28.02   108.82   200.45   206.62   211.78   208.74   206.70
sm4-ce-cipher
    ECB enc | 336.79   587.13   682.70   747.37   803.75   811.52   818.06
    ECB dec | 339.18   584.52   679.72   743.68   798.82   803.83   811.54
    CBC enc | 316.63   521.47   597.00   647.14   690.82   695.21   700.55
    CBC dec | 291.80   503.79   585.66   640.82   689.86   695.16   701.72
    CFB enc | 294.79   482.31   552.13   594.71   631.60   628.91   638.92
    CFB dec | 293.09   466.44   526.56   563.17   594.41   592.26   601.97
    CTR enc | 309.61   506.13   576.86   620.47   656.38   654.51   665.10
    CTR dec | 306.69   505.57   576.84   620.18   657.09   654.52   665.32
sm4-ce
    ECB enc | 366.96  1329.81  2024.29  2755.50  3790.07  3861.91  4051.40
    ECB dec | 367.30  1323.93  2018.72  2747.43  3787.39  3862.55  4052.62
    CBC enc | 358.09   682.68   807.24   885.35   958.29   963.60   973.73
    CBC dec | 366.51  1303.63  1978.64  2667.93  3624.53  3683.41  3856.08
    CFB enc | 351.51   681.26   807.81   893.10   968.54   969.17   985.83
    CFB dec | 354.98  1266.61  1929.63  2634.81  3614.23  3611.59  3841.68
    CTR enc | 324.23  1121.25  1689.44  2256.70  2981.90  3007.79  3060.74
    CTR dec | 324.18  1120.44  1694.31  2258.32  2982.01  3010.09  3060.99

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-04-08 16:13:29 +08:00

373 lines
9.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 Cipher Algorithm, using ARMv8 Crypto Extensions
* as specified in
* https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
*
* Copyright (C) 2022, Alibaba Group.
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
asmlinkage void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec,
const u32 *fk, const u32 *ck);
asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src);
asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src,
unsigned int nblks);
asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
{
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_crypt(rkey, dst, src, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_ecb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_ecb_do_crypt(req, ctx->rkey_enc);
}
static int sm4_ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_ecb_do_crypt(req, ctx->rkey_dec);
}
static int sm4_cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_cfb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cfb_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_cfb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cfb_dec(ctx->rkey_enc, dst, src, walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int sm4_ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static struct skcipher_alg sm4_algs[] = {
{
.base = {
.cra_name = "ecb(sm4)",
.cra_driver_name = "ecb-sm4-ce",
.cra_priority = 400,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_ecb_encrypt,
.decrypt = sm4_ecb_decrypt,
}, {
.base = {
.cra_name = "cbc(sm4)",
.cra_driver_name = "cbc-sm4-ce",
.cra_priority = 400,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_cbc_encrypt,
.decrypt = sm4_cbc_decrypt,
}, {
.base = {
.cra_name = "cfb(sm4)",
.cra_driver_name = "cfb-sm4-ce",
.cra_priority = 400,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_cfb_encrypt,
.decrypt = sm4_cfb_decrypt,
}, {
.base = {
.cra_name = "ctr(sm4)",
.cra_driver_name = "ctr-sm4-ce",
.cra_priority = 400,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.setkey = sm4_setkey,
.encrypt = sm4_ctr_crypt,
.decrypt = sm4_ctr_crypt,
}
};
static int __init sm4_init(void)
{
return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
static void __exit sm4_exit(void)
{
crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
module_cpu_feature_match(SM4, sm4_init);
module_exit(sm4_exit);
MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions");
MODULE_ALIAS_CRYPTO("sm4-ce");
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("ecb(sm4)");
MODULE_ALIAS_CRYPTO("cbc(sm4)");
MODULE_ALIAS_CRYPTO("cfb(sm4)");
MODULE_ALIAS_CRYPTO("ctr(sm4)");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_LICENSE("GPL v2");