This update includes the following changes:

API:
 
 - Optimise away self-test overhead when they are disabled.
 - Support symmetric encryption via keyring keys in af_alg.
 - Flip hwrng default_quality, the default is now maximum entropy.
 
 Algorithms:
 
 - Add library version of aesgcm.
 - CFI fixes for assembly code.
 - Add arm/arm64 accelerated versions of sm3/sm4.
 
 Drivers:
 
 - Remove assumption on arm64 that kmalloc is DMA-aligned.
 - Fix selftest failures in rockchip.
 - Add support for RK3328/RK3399 in rockchip.
 - Add deflate support in qat.
 - Merge ux500 into stm32.
 - Add support for TEE for PCI ID 0x14CA in ccp.
 - Add mt7986 support in mtk.
 - Add MaxLinear platform support in inside-secure.
 - Add NPCM8XX support in npcm.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEn51F/lCuNhUwmDeSxycdCkmxi6cFAmOZhNQACgkQxycdCkmx
 i6edOQ/+IHYe2Z+fLsMGs0qgTVaEV33O0crTRl/PMkfBJai57grz6x/G9QrkwGHS
 084u4RmwhVrE7Z/pxvey48m0lHMw3H/ElLTRl5LV1zE2OtGgr4VV63wtqthu1QS1
 KblVnjb52DhFhvF1O1IrK9lxyX0lByOiARFVdyZR6+Rb66Xfq8rqk5t8U8mmTUFz
 ds9S2Un4HajgtjNEyI78DOX8o4wVST8tltQs0eVii6T9AeXgSgX37ytD7Xtg/zrz
 /p61KFgKBQkRT7EEGD6xgNrND0vNAp2w98ZTTRXTZI8+Y0aTUcTYya7cXOLBt9bQ
 rA7z9sNKvmwJijTMV6O9eqRGcYfzc2G4qfMhlQqj/P2pjLnEZXdvFNHTTbclR76h
 2UFlZXPDQVQukvnNNnB6bmIvv6DsM+jmGH0pK5BnBJXnD5SOZh1RqjJxw0Kj6QCM
 VxpKDvfStux2Guh6mz1lJna/S44qKy/sVYkWUawcmE4RF2+GfNayM1GUpEUofndE
 vz1yZdgLPETSh5QzKrjFkUAnqo/AsAdc5Qxroz9DRz1BCC0GCuIxjUG8ScTWgcth
 R/reQDczBckCNpPxrWPHHYoVXnAMwEFySfcjZyuCoMO6t6qVUvcjRShCyKwO/JPl
 9YREdRmq0swwIB9cFIrEoWrzc3wjjBtsltDFlkKsa9c92LXoW+g=
 =OpWt
 -----END PGP SIGNATURE-----

Merge tag 'v6.2-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:
   - Optimise away self-test overhead when they are disabled
   - Support symmetric encryption via keyring keys in af_alg
   - Flip hwrng default_quality, the default is now maximum entropy

  Algorithms:
   - Add library version of aesgcm
   - CFI fixes for assembly code
   - Add arm/arm64 accelerated versions of sm3/sm4

  Drivers:
   - Remove assumption on arm64 that kmalloc is DMA-aligned
   - Fix selftest failures in rockchip
   - Add support for RK3328/RK3399 in rockchip
   - Add deflate support in qat
   - Merge ux500 into stm32
   - Add support for TEE for PCI ID 0x14CA in ccp
   - Add mt7986 support in mtk
   - Add MaxLinear platform support in inside-secure
   - Add NPCM8XX support in npcm"

* tag 'v6.2-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (184 commits)
  crypto: ux500/cryp - delete driver
  crypto: stm32/cryp - enable for use with Ux500
  crypto: stm32 - enable drivers to be used on Ux500
  dt-bindings: crypto: Let STM32 define Ux500 CRYP
  hwrng: geode - Fix PCI device refcount leak
  hwrng: amd - Fix PCI device refcount leak
  crypto: qce - Set DMA alignment explicitly
  crypto: octeontx2 - Set DMA alignment explicitly
  crypto: octeontx - Set DMA alignment explicitly
  crypto: keembay - Set DMA alignment explicitly
  crypto: safexcel - Set DMA alignment explicitly
  crypto: hisilicon/hpre - Set DMA alignment explicitly
  crypto: chelsio - Set DMA alignment explicitly
  crypto: ccree - Set DMA alignment explicitly
  crypto: ccp - Set DMA alignment explicitly
  crypto: cavium - Set DMA alignment explicitly
  crypto: img-hash - Fix variable dereferenced before check 'hdev->req'
  crypto: arm64/ghash-ce - use frame_push/pop macros consistently
  crypto: arm64/crct10dif - use frame_push/pop macros consistently
  crypto: arm64/aes-modes - use frame_push/pop macros consistently
  ...
This commit is contained in:
Linus Torvalds 2022-12-14 12:31:09 -08:00
commit 64e7003c6b
255 changed files with 13276 additions and 6999 deletions

View File

@ -172,7 +172,7 @@ Here are schematics of how these functions are called when operated from
other part of the kernel. Note that the .setkey() call might happen
before or after any of these schematics happen, but must not happen
during any of these are in-flight. Please note that calling .init()
followed immediately by .finish() is also a perfectly valid
followed immediately by .final() is also a perfectly valid
transformation.
::

View File

@ -131,9 +131,9 @@ from the kernel crypto API. If the buffer is too small for the message
digest, the flag MSG_TRUNC is set by the kernel.
In order to set a message digest key, the calling application must use
the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC
operation is performed without the initial HMAC state change caused by
the key.
the setsockopt() option of ALG_SET_KEY or ALG_SET_KEY_BY_KEY_SERIAL. If the
key is not set the HMAC operation is performed without the initial HMAC state
change caused by the key.
Symmetric Cipher API
--------------------
@ -382,6 +382,15 @@ mentioned optname:
- the RNG cipher type to provide the seed
- ALG_SET_KEY_BY_KEY_SERIAL -- Setting the key via keyring key_serial_t.
This operation behaves the same as ALG_SET_KEY. The decrypted
data is copied from a keyring key, and uses that data as the
key for symmetric encryption.
The passed in key_serial_t must have the KEY_(POS|USR|GRP|OTH)_SEARCH
permission set, otherwise -EPERM is returned. Supports key types: user,
logon, encrypted, and trusted.
- ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size for
AEAD ciphers. For a encryption operation, the authentication tag of
the given size will be generated. For a decryption operation, the

View File

@ -0,0 +1,127 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/crypto/rockchip,rk3288-crypto.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Rockchip Electronics Security Accelerator
maintainers:
- Heiko Stuebner <heiko@sntech.de>
properties:
compatible:
enum:
- rockchip,rk3288-crypto
- rockchip,rk3328-crypto
- rockchip,rk3399-crypto
reg:
maxItems: 1
interrupts:
maxItems: 1
clocks:
minItems: 3
maxItems: 4
clock-names:
minItems: 3
maxItems: 4
resets:
minItems: 1
maxItems: 3
reset-names:
minItems: 1
maxItems: 3
allOf:
- if:
properties:
compatible:
contains:
const: rockchip,rk3288-crypto
then:
properties:
clocks:
minItems: 4
clock-names:
items:
- const: aclk
- const: hclk
- const: sclk
- const: apb_pclk
resets:
maxItems: 1
reset-names:
items:
- const: crypto-rst
- if:
properties:
compatible:
contains:
const: rockchip,rk3328-crypto
then:
properties:
clocks:
maxItems: 3
clock-names:
items:
- const: hclk_master
- const: hclk_slave
- const: sclk
resets:
maxItems: 1
reset-names:
items:
- const: crypto-rst
- if:
properties:
compatible:
contains:
const: rockchip,rk3399-crypto
then:
properties:
clocks:
maxItems: 3
clock-names:
items:
- const: hclk_master
- const: hclk_slave
- const: sclk
resets:
minItems: 3
reset-names:
items:
- const: master
- const: slave
- const: crypto-rst
required:
- compatible
- reg
- interrupts
- clocks
- clock-names
- resets
- reset-names
additionalProperties: false
examples:
- |
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/clock/rk3288-cru.h>
crypto@ff8a0000 {
compatible = "rockchip,rk3288-crypto";
reg = <0xff8a0000 0x4000>;
interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>,
<&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>;
clock-names = "aclk", "hclk", "sclk", "apb_pclk";
resets = <&cru SRST_CRYPTO>;
reset-names = "crypto-rst";
};

View File

@ -1,28 +0,0 @@
Rockchip Electronics And Security Accelerator
Required properties:
- compatible: Should be "rockchip,rk3288-crypto"
- reg: Base physical address of the engine and length of memory mapped
region
- interrupts: Interrupt number
- clocks: Reference to the clocks about crypto
- clock-names: "aclk" used to clock data
"hclk" used to clock data
"sclk" used to clock crypto accelerator
"apb_pclk" used to clock dma
- resets: Must contain an entry for each entry in reset-names.
See ../reset/reset.txt for details.
- reset-names: Must include the name "crypto-rst".
Examples:
crypto: cypto-controller@ff8a0000 {
compatible = "rockchip,rk3288-crypto";
reg = <0xff8a0000 0x4000>;
interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>,
<&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>;
clock-names = "aclk", "hclk", "sclk", "apb_pclk";
resets = <&cru SRST_CRYPTO>;
reset-names = "crypto-rst";
};

View File

@ -6,12 +6,18 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: STMicroelectronics STM32 CRYP bindings
description: The STM32 CRYP block is built on the CRYP block found in
the STn8820 SoC introduced in 2007, and subsequently used in the U8500
SoC in 2010.
maintainers:
- Lionel Debieve <lionel.debieve@foss.st.com>
properties:
compatible:
enum:
- st,stn8820-cryp
- stericsson,ux500-cryp
- st,stm32f756-cryp
- st,stm32mp1-cryp
@ -27,6 +33,19 @@ properties:
resets:
maxItems: 1
dmas:
items:
- description: mem2cryp DMA channel
- description: cryp2mem DMA channel
dma-names:
items:
- const: mem2cryp
- const: cryp2mem
power-domains:
maxItems: 1
required:
- compatible
- reg

View File

@ -16,7 +16,9 @@ maintainers:
properties:
compatible:
const: nuvoton,npcm750-rng
enum:
- nuvoton,npcm750-rng
- nuvoton,npcm845-rng
reg:
maxItems: 1

View File

@ -17941,6 +17941,13 @@ F: Documentation/ABI/*/sysfs-driver-hid-roccat*
F: drivers/hid/hid-roccat*
F: include/linux/hid-roccat*
ROCKCHIP CRYPTO DRIVERS
M: Corentin Labbe <clabbe@baylibre.com>
L: linux-crypto@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
F: drivers/crypto/rockchip/
ROCKCHIP I2S TDM DRIVER
M: Nicolas Frattaroli <frattaroli.nicolas@gmail.com>
L: linux-rockchip@lists.infradead.org

View File

@ -18,7 +18,7 @@ config CRYPTO_GHASH_ARM_CE
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_CRYPTD
select CRYPTO_GF128MUL
select CRYPTO_LIB_GF128MUL
help
GCM GHASH function (NIST SP800-38D)

View File

@ -7,7 +7,7 @@
*/
#include <crypto/aes.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <linux/module.h>
asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);

View File

@ -69,7 +69,7 @@
/*
* void nh_neon(const u32 *key, const u8 *message, size_t message_len,
* u8 hash[NH_HASH_BYTES])
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/

View File

@ -14,14 +14,7 @@
#include <linux/module.h>
asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
u8 hash[NH_HASH_BYTES]);
/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES])
{
nh_neon(key, message, message_len, (u8 *)hash);
}
__le64 hash[NH_NUM_PASSES]);
static int nhpoly1305_neon_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
@ -33,7 +26,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_neon_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
kernel_neon_end();
src += n;
srclen -= n;

View File

@ -6,8 +6,8 @@ config CRYPTO_GHASH_ARM64_CE
tristate "Hash functions: GHASH (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_GF128MUL
select CRYPTO_LIB_AES
select CRYPTO_LIB_GF128MUL
select CRYPTO_AEAD
help
GCM GHASH function (NIST SP800-38D)
@ -96,6 +96,17 @@ config CRYPTO_SHA3_ARM64
Architecture: arm64 using:
- ARMv8.2 Crypto Extensions
config CRYPTO_SM3_NEON
tristate "Hash functions: SM3 (NEON)"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_SM3
help
SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
Architecture: arm64 using:
- NEON (Advanced SIMD) extensions
config CRYPTO_SM3_ARM64_CE
tristate "Hash functions: SM3 (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
@ -220,7 +231,7 @@ config CRYPTO_SM4_ARM64_CE
- NEON (Advanced SIMD) extensions
config CRYPTO_SM4_ARM64_CE_BLK
tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR (ARMv8 Crypto Extensions)"
tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR/XTS (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
select CRYPTO_SM4
@ -231,6 +242,8 @@ config CRYPTO_SM4_ARM64_CE_BLK
- CBC (Cipher Block Chaining) mode (NIST SP800-38A)
- CFB (Cipher Feedback) mode (NIST SP800-38A)
- CTR (Counter) mode (NIST SP800-38A)
- XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
and IEEE 1619)
Architecture: arm64 using:
- ARMv8 Crypto Extensions
@ -268,6 +281,38 @@ config CRYPTO_AES_ARM64_CE_CCM
- ARMv8 Crypto Extensions
- NEON (Advanced SIMD) extensions
config CRYPTO_SM4_ARM64_CE_CCM
tristate "AEAD cipher: SM4 in CCM mode (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AEAD
select CRYPTO_SM4
select CRYPTO_SM4_ARM64_CE_BLK
help
AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with
CCM (Counter with Cipher Block Chaining-Message Authentication Code)
authenticated encryption mode (NIST SP800-38C)
Architecture: arm64 using:
- ARMv8 Crypto Extensions
- NEON (Advanced SIMD) extensions
config CRYPTO_SM4_ARM64_CE_GCM
tristate "AEAD cipher: SM4 in GCM mode (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AEAD
select CRYPTO_SM4
select CRYPTO_SM4_ARM64_CE_BLK
help
AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with
GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
Architecture: arm64 using:
- ARMv8 Crypto Extensions
- PMULL (Polynomial Multiply Long) instructions
- NEON (Advanced SIMD) extensions
config CRYPTO_CRCT10DIF_ARM64_CE
tristate "CRCT10DIF (PMULL)"
depends on KERNEL_MODE_NEON && CRC_T10DIF

View File

@ -17,6 +17,9 @@ sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o
obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o
sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
@ -26,6 +29,12 @@ sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o
sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o
sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_GCM) += sm4-ce-gcm.o
sm4-ce-gcm-y := sm4-ce-gcm-glue.o sm4-ce-gcm-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o

View File

@ -9,9 +9,9 @@
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include "aes-ce-setkey.h"

View File

@ -6,7 +6,7 @@
*/
#include <crypto/aes.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <linux/module.h>
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);

View File

@ -52,8 +52,7 @@ SYM_FUNC_END(aes_decrypt_block5x)
*/
AES_FUNC_START(aes_ecb_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 0
enc_prepare w3, x2, x5
@ -77,14 +76,13 @@ ST5( st1 {v4.16b}, [x0], #16 )
subs w4, w4, #1
bne .Lecbencloop
.Lecbencout:
ldp x29, x30, [sp], #16
frame_pop
ret
AES_FUNC_END(aes_ecb_encrypt)
AES_FUNC_START(aes_ecb_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 0
dec_prepare w3, x2, x5
@ -108,7 +106,7 @@ ST5( st1 {v4.16b}, [x0], #16 )
subs w4, w4, #1
bne .Lecbdecloop
.Lecbdecout:
ldp x29, x30, [sp], #16
frame_pop
ret
AES_FUNC_END(aes_ecb_decrypt)
@ -171,9 +169,6 @@ AES_FUNC_END(aes_cbc_encrypt)
AES_FUNC_END(aes_essiv_cbc_encrypt)
AES_FUNC_START(aes_essiv_cbc_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
ld1 {cbciv.16b}, [x5] /* get iv */
mov w8, #14 /* AES-256: 14 rounds */
@ -182,11 +177,9 @@ AES_FUNC_START(aes_essiv_cbc_decrypt)
b .Lessivcbcdecstart
AES_FUNC_START(aes_cbc_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
ld1 {cbciv.16b}, [x5] /* get iv */
.Lessivcbcdecstart:
frame_push 0
dec_prepare w3, x2, x6
.LcbcdecloopNx:
@ -236,7 +229,7 @@ ST5( st1 {v4.16b}, [x0], #16 )
bne .Lcbcdecloop
.Lcbcdecout:
st1 {cbciv.16b}, [x5] /* return iv */
ldp x29, x30, [sp], #16
frame_pop
ret
AES_FUNC_END(aes_cbc_decrypt)
AES_FUNC_END(aes_essiv_cbc_decrypt)
@ -337,8 +330,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
BLOCKS .req x13
BLOCKS_W .req w13
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 0
enc_prepare ROUNDS_W, KEY, IV_PART
ld1 {vctr.16b}, [IV]
@ -481,7 +473,7 @@ ST5( st1 {v4.16b}, [OUT], #16 )
.if !\xctr
st1 {vctr.16b}, [IV] /* return next CTR value */
.endif
ldp x29, x30, [sp], #16
frame_pop
ret
.Lctrtail\xctr:
@ -645,8 +637,7 @@ AES_FUNC_END(aes_xctr_encrypt)
.endm
AES_FUNC_START(aes_xts_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 0
ld1 {v4.16b}, [x6]
xts_load_mask v8
@ -704,7 +695,7 @@ AES_FUNC_START(aes_xts_encrypt)
st1 {v0.16b}, [x0]
.Lxtsencret:
st1 {v4.16b}, [x6]
ldp x29, x30, [sp], #16
frame_pop
ret
.LxtsencctsNx:
@ -732,8 +723,7 @@ AES_FUNC_START(aes_xts_encrypt)
AES_FUNC_END(aes_xts_encrypt)
AES_FUNC_START(aes_xts_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 0
/* subtract 16 bytes if we are doing CTS */
sub w8, w4, #0x10
@ -794,7 +784,7 @@ AES_FUNC_START(aes_xts_decrypt)
b .Lxtsdecloop
.Lxtsdecout:
st1 {v4.16b}, [x6]
ldp x29, x30, [sp], #16
frame_pop
ret
.Lxtsdeccts:

View File

@ -760,7 +760,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8)
eor v6.16b, v6.16b, v31.16b
eor v7.16b, v7.16b, v16.16b
stp q16, q17, [sp, #16]
stp q16, q17, [x6]
mov bskey, x2
mov rounds, x3
@ -768,8 +768,8 @@ SYM_FUNC_START_LOCAL(__xts_crypt8)
SYM_FUNC_END(__xts_crypt8)
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
stp x29, x30, [sp, #-48]!
mov x29, sp
frame_push 0, 32
add x6, sp, #.Lframe_local_offset
ld1 {v25.16b}, [x5]
@ -781,7 +781,7 @@ SYM_FUNC_END(__xts_crypt8)
eor v18.16b, \o2\().16b, v27.16b
eor v19.16b, \o3\().16b, v28.16b
ldp q24, q25, [sp, #16]
ldp q24, q25, [x6]
eor v20.16b, \o4\().16b, v29.16b
eor v21.16b, \o5\().16b, v30.16b
@ -795,7 +795,7 @@ SYM_FUNC_END(__xts_crypt8)
b.gt 0b
st1 {v25.16b}, [x5]
ldp x29, x30, [sp], #48
frame_pop
ret
.endm
@ -820,9 +820,7 @@ SYM_FUNC_END(aesbs_xts_decrypt)
* int rounds, int blocks, u8 iv[])
*/
SYM_FUNC_START(aesbs_ctr_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 0
ldp x7, x8, [x5]
ld1 {v0.16b}, [x5]
CPU_LE( rev x7, x7 )
@ -862,6 +860,6 @@ CPU_LE( rev x8, x8 )
b.gt 0b
st1 {v0.16b}, [x5]
ldp x29, x30, [sp], #16
frame_pop
ret
SYM_FUNC_END(aesbs_ctr_encrypt)

View File

@ -429,7 +429,7 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
umov w0, v0.h[0]
.ifc \p, p8
ldp x29, x30, [sp], #16
frame_pop
.endif
ret
@ -466,8 +466,7 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
// Assumes len >= 16.
//
SYM_FUNC_START(crc_t10dif_pmull_p8)
stp x29, x30, [sp, #-16]!
mov x29, sp
frame_push 1
crc_t10dif_pmull p8
SYM_FUNC_END(crc_t10dif_pmull_p8)

View File

@ -436,9 +436,7 @@ SYM_FUNC_END(pmull_ghash_update_p8)
.align 6
.macro pmull_gcm_do_crypt, enc
stp x29, x30, [sp, #-32]!
mov x29, sp
str x19, [sp, #24]
frame_push 1
load_round_keys x7, x6, x8
@ -529,7 +527,7 @@ CPU_LE( rev w8, w8 )
.endif
bne 0b
3: ldp x19, x10, [sp, #24]
3: ldr x10, [sp, #.Lframe_local_offset]
cbz x10, 5f // output tag?
ld1 {INP3.16b}, [x10] // load lengths[]
@ -562,7 +560,7 @@ CPU_LE( rev w8, w8 )
smov w0, v0.b[0] // return b0
.endif
4: ldp x29, x30, [sp], #32
4: frame_pop
ret
5:

View File

@ -508,7 +508,7 @@ static void __exit ghash_ce_mod_exit(void)
crypto_unregister_shash(&ghash_alg);
}
static const struct cpu_feature ghash_cpu_feature[] = {
static const struct cpu_feature __maybe_unused ghash_cpu_feature[] = {
{ cpu_feature(PMULL) }, { }
};
MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);

View File

@ -8,6 +8,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
KEY .req x0
MESSAGE .req x1
@ -58,11 +59,11 @@
/*
* void nh_neon(const u32 *key, const u8 *message, size_t message_len,
* u8 hash[NH_HASH_BYTES])
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
SYM_FUNC_START(nh_neon)
SYM_TYPED_FUNC_START(nh_neon)
ld1 {K0.4s,K1.4s}, [KEY], #32
movi PASS0_SUMS.2d, #0

View File

@ -14,14 +14,7 @@
#include <linux/module.h>
asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
u8 hash[NH_HASH_BYTES]);
/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES])
{
nh_neon(key, message, message_len, (u8 *)hash);
}
__le64 hash[NH_NUM_PASSES]);
static int nhpoly1305_neon_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
@ -33,7 +26,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_neon_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
kernel_neon_end();
src += n;
srclen -= n;

View File

@ -84,7 +84,7 @@ static struct shash_alg sm3_alg = {
.base.cra_driver_name = "sm3-ce",
.base.cra_blocksize = SM3_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.base.cra_priority = 200,
.base.cra_priority = 400,
};
static int __init sm3_ce_mod_init(void)

View File

@ -0,0 +1,601 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* sm3-neon-core.S - SM3 secure hash using NEON instructions
*
* Linux/arm64 port of the libgcrypt SM3 implementation for AArch64
*
* Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
* Copyright (c) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/assembler.h>
/* Context structure */
#define state_h0 0
#define state_h1 4
#define state_h2 8
#define state_h3 12
#define state_h4 16
#define state_h5 20
#define state_h6 24
#define state_h7 28
/* Stack structure */
#define STACK_W_SIZE (32 * 2 * 3)
#define STACK_W (0)
#define STACK_SIZE (STACK_W + STACK_W_SIZE)
/* Register macros */
#define RSTATE x0
#define RDATA x1
#define RNBLKS x2
#define RKPTR x28
#define RFRAME x29
#define ra w3
#define rb w4
#define rc w5
#define rd w6
#define re w7
#define rf w8
#define rg w9
#define rh w10
#define t0 w11
#define t1 w12
#define t2 w13
#define t3 w14
#define t4 w15
#define t5 w16
#define t6 w17
#define k_even w19
#define k_odd w20
#define addr0 x21
#define addr1 x22
#define s0 w23
#define s1 w24
#define s2 w25
#define s3 w26
#define W0 v0
#define W1 v1
#define W2 v2
#define W3 v3
#define W4 v4
#define W5 v5
#define XTMP0 v6
#define XTMP1 v7
#define XTMP2 v16
#define XTMP3 v17
#define XTMP4 v18
#define XTMP5 v19
#define XTMP6 v20
/* Helper macros. */
#define _(...) /*_*/
#define clear_vec(x) \
movi x.8h, #0;
#define rolw(o, a, n) \
ror o, a, #(32 - n);
/* Round function macros. */
#define GG1_1(x, y, z, o, t) \
eor o, x, y;
#define GG1_2(x, y, z, o, t) \
eor o, o, z;
#define GG1_3(x, y, z, o, t)
#define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t)
#define FF1_2(x, y, z, o, t)
#define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t)
#define GG2_1(x, y, z, o, t) \
bic o, z, x;
#define GG2_2(x, y, z, o, t) \
and t, y, x;
#define GG2_3(x, y, z, o, t) \
eor o, o, t;
#define FF2_1(x, y, z, o, t) \
eor o, x, y;
#define FF2_2(x, y, z, o, t) \
and t, x, y; \
and o, o, z;
#define FF2_3(x, y, z, o, t) \
eor o, o, t;
#define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
K_LOAD(round); \
ldr t5, [sp, #(wtype##_W1_ADDR(round, widx))]; \
rolw(t0, a, 12); /* rol(a, 12) => t0 */ \
IOP(1, iop_param); \
FF##i##_1(a, b, c, t1, t2); \
ldr t6, [sp, #(wtype##_W1W2_ADDR(round, widx))]; \
add k, k, e; \
IOP(2, iop_param); \
GG##i##_1(e, f, g, t3, t4); \
FF##i##_2(a, b, c, t1, t2); \
IOP(3, iop_param); \
add k, k, t0; \
add h, h, t5; \
add d, d, t6; /* w1w2 + d => d */ \
IOP(4, iop_param); \
rolw(k, k, 7); /* rol (t0 + e + t), 7) => k */ \
GG##i##_2(e, f, g, t3, t4); \
add h, h, k; /* h + w1 + k => h */ \
IOP(5, iop_param); \
FF##i##_3(a, b, c, t1, t2); \
eor t0, t0, k; /* k ^ t0 => t0 */ \
GG##i##_3(e, f, g, t3, t4); \
add d, d, t1; /* FF(a,b,c) + d => d */ \
IOP(6, iop_param); \
add t3, t3, h; /* GG(e,f,g) + h => t3 */ \
rolw(b, b, 9); /* rol(b, 9) => b */ \
eor h, t3, t3, ror #(32-9); \
IOP(7, iop_param); \
add d, d, t0; /* t0 + d => d */ \
rolw(f, f, 19); /* rol(f, 19) => f */ \
IOP(8, iop_param); \
eor h, h, t3, ror #(32-17); /* P0(t3) => h */
#define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
#define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
#define KL(round) \
ldp k_even, k_odd, [RKPTR, #(4*(round))];
/* Input expansion macros. */
/* Byte-swapped input address. */
#define IW_W_ADDR(round, widx, offs) \
(STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))
/* Expanded input address. */
#define XW_W_ADDR(round, widx, offs) \
(STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))
/* Rounds 1-12, byte-swapped input block addresses. */
#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 32)
#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 48)
/* Rounds 1-12, expanded input block addresses. */
#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0)
#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 16)
/* Input block loading.
* Interleaving within round function needed for in-order CPUs. */
#define LOAD_W_VEC_1_1() \
add addr0, sp, #IW_W1_ADDR(0, 0);
#define LOAD_W_VEC_1_2() \
add addr1, sp, #IW_W1_ADDR(4, 0);
#define LOAD_W_VEC_1_3() \
ld1 {W0.16b}, [RDATA], #16;
#define LOAD_W_VEC_1_4() \
ld1 {W1.16b}, [RDATA], #16;
#define LOAD_W_VEC_1_5() \
ld1 {W2.16b}, [RDATA], #16;
#define LOAD_W_VEC_1_6() \
ld1 {W3.16b}, [RDATA], #16;
#define LOAD_W_VEC_1_7() \
rev32 XTMP0.16b, W0.16b;
#define LOAD_W_VEC_1_8() \
rev32 XTMP1.16b, W1.16b;
#define LOAD_W_VEC_2_1() \
rev32 XTMP2.16b, W2.16b;
#define LOAD_W_VEC_2_2() \
rev32 XTMP3.16b, W3.16b;
#define LOAD_W_VEC_2_3() \
eor XTMP4.16b, XTMP1.16b, XTMP0.16b;
#define LOAD_W_VEC_2_4() \
eor XTMP5.16b, XTMP2.16b, XTMP1.16b;
#define LOAD_W_VEC_2_5() \
st1 {XTMP0.16b}, [addr0], #16;
#define LOAD_W_VEC_2_6() \
st1 {XTMP4.16b}, [addr0]; \
add addr0, sp, #IW_W1_ADDR(8, 0);
#define LOAD_W_VEC_2_7() \
eor XTMP6.16b, XTMP3.16b, XTMP2.16b;
#define LOAD_W_VEC_2_8() \
ext W0.16b, XTMP0.16b, XTMP0.16b, #8; /* W0: xx, w0, xx, xx */
#define LOAD_W_VEC_3_1() \
mov W2.16b, XTMP1.16b; /* W2: xx, w6, w5, w4 */
#define LOAD_W_VEC_3_2() \
st1 {XTMP1.16b}, [addr1], #16;
#define LOAD_W_VEC_3_3() \
st1 {XTMP5.16b}, [addr1]; \
ext W1.16b, XTMP0.16b, XTMP0.16b, #4; /* W1: xx, w3, w2, w1 */
#define LOAD_W_VEC_3_4() \
ext W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */
#define LOAD_W_VEC_3_5() \
ext W4.16b, XTMP2.16b, XTMP3.16b, #8; /* W4: xx, w12, w11, w10 */
#define LOAD_W_VEC_3_6() \
st1 {XTMP2.16b}, [addr0], #16;
#define LOAD_W_VEC_3_7() \
st1 {XTMP6.16b}, [addr0];
#define LOAD_W_VEC_3_8() \
ext W5.16b, XTMP3.16b, XTMP3.16b, #4; /* W5: xx, w15, w14, w13 */
#define LOAD_W_VEC_1(iop_num, ...) \
LOAD_W_VEC_1_##iop_num()
#define LOAD_W_VEC_2(iop_num, ...) \
LOAD_W_VEC_2_##iop_num()
#define LOAD_W_VEC_3(iop_num, ...) \
LOAD_W_VEC_3_##iop_num()
/* Message scheduling. Note: 3 words per vector register.
* Interleaving within round function needed for in-order CPUs. */
#define SCHED_W_1_1(round, w0, w1, w2, w3, w4, w5) \
/* Load (w[i - 16]) => XTMP0 */ \
/* Load (w[i - 13]) => XTMP5 */ \
ext XTMP0.16b, w0.16b, w0.16b, #12; /* XTMP0: w0, xx, xx, xx */
#define SCHED_W_1_2(round, w0, w1, w2, w3, w4, w5) \
ext XTMP5.16b, w1.16b, w1.16b, #12;
#define SCHED_W_1_3(round, w0, w1, w2, w3, w4, w5) \
ext XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */
#define SCHED_W_1_4(round, w0, w1, w2, w3, w4, w5) \
ext XTMP5.16b, XTMP5.16b, w2.16b, #12;
#define SCHED_W_1_5(round, w0, w1, w2, w3, w4, w5) \
/* w[i - 9] == w3 */ \
/* W3 ^ XTMP0 => XTMP0 */ \
eor XTMP0.16b, XTMP0.16b, w3.16b;
#define SCHED_W_1_6(round, w0, w1, w2, w3, w4, w5) \
/* w[i - 3] == w5 */ \
/* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \
/* rol(XTMP5, 7) => XTMP1 */ \
add addr0, sp, #XW_W1_ADDR((round), 0); \
shl XTMP2.4s, w5.4s, #15;
#define SCHED_W_1_7(round, w0, w1, w2, w3, w4, w5) \
shl XTMP1.4s, XTMP5.4s, #7;
#define SCHED_W_1_8(round, w0, w1, w2, w3, w4, w5) \
sri XTMP2.4s, w5.4s, #(32-15);
#define SCHED_W_2_1(round, w0, w1, w2, w3, w4, w5) \
sri XTMP1.4s, XTMP5.4s, #(32-7);
#define SCHED_W_2_2(round, w0, w1, w2, w3, w4, w5) \
eor XTMP0.16b, XTMP0.16b, XTMP2.16b;
#define SCHED_W_2_3(round, w0, w1, w2, w3, w4, w5) \
/* w[i - 6] == W4 */ \
/* W4 ^ XTMP1 => XTMP1 */ \
eor XTMP1.16b, XTMP1.16b, w4.16b;
#define SCHED_W_2_4(round, w0, w1, w2, w3, w4, w5) \
/* P1(XTMP0) ^ XTMP1 => W0 */ \
shl XTMP3.4s, XTMP0.4s, #15;
#define SCHED_W_2_5(round, w0, w1, w2, w3, w4, w5) \
shl XTMP4.4s, XTMP0.4s, #23;
#define SCHED_W_2_6(round, w0, w1, w2, w3, w4, w5) \
eor w0.16b, XTMP1.16b, XTMP0.16b;
#define SCHED_W_2_7(round, w0, w1, w2, w3, w4, w5) \
sri XTMP3.4s, XTMP0.4s, #(32-15);
#define SCHED_W_2_8(round, w0, w1, w2, w3, w4, w5) \
sri XTMP4.4s, XTMP0.4s, #(32-23);
#define SCHED_W_3_1(round, w0, w1, w2, w3, w4, w5) \
eor w0.16b, w0.16b, XTMP3.16b;
#define SCHED_W_3_2(round, w0, w1, w2, w3, w4, w5) \
/* Load (w[i - 3]) => XTMP2 */ \
ext XTMP2.16b, w4.16b, w4.16b, #12;
#define SCHED_W_3_3(round, w0, w1, w2, w3, w4, w5) \
eor w0.16b, w0.16b, XTMP4.16b;
#define SCHED_W_3_4(round, w0, w1, w2, w3, w4, w5) \
ext XTMP2.16b, XTMP2.16b, w5.16b, #12;
#define SCHED_W_3_5(round, w0, w1, w2, w3, w4, w5) \
/* W1 ^ W2 => XTMP3 */ \
eor XTMP3.16b, XTMP2.16b, w0.16b;
#define SCHED_W_3_6(round, w0, w1, w2, w3, w4, w5)
#define SCHED_W_3_7(round, w0, w1, w2, w3, w4, w5) \
st1 {XTMP2.16b-XTMP3.16b}, [addr0];
#define SCHED_W_3_8(round, w0, w1, w2, w3, w4, w5)
#define SCHED_W_W0W1W2W3W4W5_1(iop_num, round) \
SCHED_W_1_##iop_num(round, W0, W1, W2, W3, W4, W5)
#define SCHED_W_W0W1W2W3W4W5_2(iop_num, round) \
SCHED_W_2_##iop_num(round, W0, W1, W2, W3, W4, W5)
#define SCHED_W_W0W1W2W3W4W5_3(iop_num, round) \
SCHED_W_3_##iop_num(round, W0, W1, W2, W3, W4, W5)
#define SCHED_W_W1W2W3W4W5W0_1(iop_num, round) \
SCHED_W_1_##iop_num(round, W1, W2, W3, W4, W5, W0)
#define SCHED_W_W1W2W3W4W5W0_2(iop_num, round) \
SCHED_W_2_##iop_num(round, W1, W2, W3, W4, W5, W0)
#define SCHED_W_W1W2W3W4W5W0_3(iop_num, round) \
SCHED_W_3_##iop_num(round, W1, W2, W3, W4, W5, W0)
#define SCHED_W_W2W3W4W5W0W1_1(iop_num, round) \
SCHED_W_1_##iop_num(round, W2, W3, W4, W5, W0, W1)
#define SCHED_W_W2W3W4W5W0W1_2(iop_num, round) \
SCHED_W_2_##iop_num(round, W2, W3, W4, W5, W0, W1)
#define SCHED_W_W2W3W4W5W0W1_3(iop_num, round) \
SCHED_W_3_##iop_num(round, W2, W3, W4, W5, W0, W1)
#define SCHED_W_W3W4W5W0W1W2_1(iop_num, round) \
SCHED_W_1_##iop_num(round, W3, W4, W5, W0, W1, W2)
#define SCHED_W_W3W4W5W0W1W2_2(iop_num, round) \
SCHED_W_2_##iop_num(round, W3, W4, W5, W0, W1, W2)
#define SCHED_W_W3W4W5W0W1W2_3(iop_num, round) \
SCHED_W_3_##iop_num(round, W3, W4, W5, W0, W1, W2)
#define SCHED_W_W4W5W0W1W2W3_1(iop_num, round) \
SCHED_W_1_##iop_num(round, W4, W5, W0, W1, W2, W3)
#define SCHED_W_W4W5W0W1W2W3_2(iop_num, round) \
SCHED_W_2_##iop_num(round, W4, W5, W0, W1, W2, W3)
#define SCHED_W_W4W5W0W1W2W3_3(iop_num, round) \
SCHED_W_3_##iop_num(round, W4, W5, W0, W1, W2, W3)
#define SCHED_W_W5W0W1W2W3W4_1(iop_num, round) \
SCHED_W_1_##iop_num(round, W5, W0, W1, W2, W3, W4)
#define SCHED_W_W5W0W1W2W3W4_2(iop_num, round) \
SCHED_W_2_##iop_num(round, W5, W0, W1, W2, W3, W4)
#define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \
SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4)
/*
* Transform blocks*64 bytes (blocks*16 32-bit words) at 'src'.
*
* void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
* int blocks)
*/
.text
.align 3
SYM_TYPED_FUNC_START(sm3_neon_transform)
ldp ra, rb, [RSTATE, #0]
ldp rc, rd, [RSTATE, #8]
ldp re, rf, [RSTATE, #16]
ldp rg, rh, [RSTATE, #24]
stp x28, x29, [sp, #-16]!
stp x19, x20, [sp, #-16]!
stp x21, x22, [sp, #-16]!
stp x23, x24, [sp, #-16]!
stp x25, x26, [sp, #-16]!
mov RFRAME, sp
sub addr0, sp, #STACK_SIZE
adr_l RKPTR, .LKtable
and sp, addr0, #(~63)
/* Preload first block. */
LOAD_W_VEC_1(1, 0)
LOAD_W_VEC_1(2, 0)
LOAD_W_VEC_1(3, 0)
LOAD_W_VEC_1(4, 0)
LOAD_W_VEC_1(5, 0)
LOAD_W_VEC_1(6, 0)
LOAD_W_VEC_1(7, 0)
LOAD_W_VEC_1(8, 0)
LOAD_W_VEC_2(1, 0)
LOAD_W_VEC_2(2, 0)
LOAD_W_VEC_2(3, 0)
LOAD_W_VEC_2(4, 0)
LOAD_W_VEC_2(5, 0)
LOAD_W_VEC_2(6, 0)
LOAD_W_VEC_2(7, 0)
LOAD_W_VEC_2(8, 0)
LOAD_W_VEC_3(1, 0)
LOAD_W_VEC_3(2, 0)
LOAD_W_VEC_3(3, 0)
LOAD_W_VEC_3(4, 0)
LOAD_W_VEC_3(5, 0)
LOAD_W_VEC_3(6, 0)
LOAD_W_VEC_3(7, 0)
LOAD_W_VEC_3(8, 0)
.balign 16
.Loop:
/* Transform 0-3 */
R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0)
R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 1, 1, IW, _, 0)
R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0)
R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 3, 3, IW, _, 0)
/* Transform 4-7 + Precalc 12-14 */
R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0)
R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 5, 1, IW, _, 0)
R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, SCHED_W_W0W1W2W3W4W5_1, 12)
R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 7, 3, IW, SCHED_W_W0W1W2W3W4W5_2, 12)
/* Transform 8-11 + Precalc 12-17 */
R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, SCHED_W_W0W1W2W3W4W5_3, 12)
R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 9, 1, IW, SCHED_W_W1W2W3W4W5W0_1, 15)
R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, SCHED_W_W1W2W3W4W5W0_2, 15)
R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 11, 3, IW, SCHED_W_W1W2W3W4W5W0_3, 15)
/* Transform 12-14 + Precalc 18-20 */
R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 18)
R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 13, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 18)
R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 18)
/* Transform 15-17 + Precalc 21-23 */
R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 15, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 21)
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 21)
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 17, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 21)
/* Transform 18-20 + Precalc 24-26 */
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 24)
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 19, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 24)
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 24)
/* Transform 21-23 + Precalc 27-29 */
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 21, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 27)
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 27)
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 23, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 27)
/* Transform 24-26 + Precalc 30-32 */
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 30)
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 25, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 30)
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 30)
/* Transform 27-29 + Precalc 33-35 */
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 27, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 33)
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 33)
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 29, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 33)
/* Transform 30-32 + Precalc 36-38 */
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 36)
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 31, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 36)
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36)
/* Transform 33-35 + Precalc 39-41 */
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 33, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 39)
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 39)
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 35, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 39)
/* Transform 36-38 + Precalc 42-44 */
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 42)
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 37, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 42)
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 42)
/* Transform 39-41 + Precalc 45-47 */
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 39, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 45)
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 45)
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 41, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 45)
/* Transform 42-44 + Precalc 48-50 */
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 48)
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 43, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 48)
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 48)
/* Transform 45-47 + Precalc 51-53 */
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 45, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 51)
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 51)
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 47, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 51)
/* Transform 48-50 + Precalc 54-56 */
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 54)
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 49, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 54)
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 54)
/* Transform 51-53 + Precalc 57-59 */
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 51, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 57)
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 57)
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 53, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 57)
/* Transform 54-56 + Precalc 60-62 */
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 60)
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 55, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 60)
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 60)
/* Transform 57-59 + Precalc 63 */
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 57, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 63)
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 63)
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 59, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 63)
/* Transform 60 */
R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _)
subs RNBLKS, RNBLKS, #1
b.eq .Lend
/* Transform 61-63 + Preload next block */
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, LOAD_W_VEC_1, _)
ldp s0, s1, [RSTATE, #0]
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _)
ldp s2, s3, [RSTATE, #8]
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, LOAD_W_VEC_3, _)
/* Update the chaining variables. */
eor ra, ra, s0
eor rb, rb, s1
ldp s0, s1, [RSTATE, #16]
eor rc, rc, s2
ldp k_even, k_odd, [RSTATE, #24]
eor rd, rd, s3
eor re, re, s0
stp ra, rb, [RSTATE, #0]
eor rf, rf, s1
stp rc, rd, [RSTATE, #8]
eor rg, rg, k_even
stp re, rf, [RSTATE, #16]
eor rh, rh, k_odd
stp rg, rh, [RSTATE, #24]
b .Loop
.Lend:
/* Transform 61-63 */
R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, _, _)
ldp s0, s1, [RSTATE, #0]
R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _)
ldp s2, s3, [RSTATE, #8]
R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, _, _)
/* Update the chaining variables. */
eor ra, ra, s0
clear_vec(W0)
eor rb, rb, s1
clear_vec(W1)
ldp s0, s1, [RSTATE, #16]
clear_vec(W2)
eor rc, rc, s2
clear_vec(W3)
ldp k_even, k_odd, [RSTATE, #24]
clear_vec(W4)
eor rd, rd, s3
clear_vec(W5)
eor re, re, s0
clear_vec(XTMP0)
stp ra, rb, [RSTATE, #0]
clear_vec(XTMP1)
eor rf, rf, s1
clear_vec(XTMP2)
stp rc, rd, [RSTATE, #8]
clear_vec(XTMP3)
eor rg, rg, k_even
clear_vec(XTMP4)
stp re, rf, [RSTATE, #16]
clear_vec(XTMP5)
eor rh, rh, k_odd
clear_vec(XTMP6)
stp rg, rh, [RSTATE, #24]
/* Clear message expansion area */
add addr0, sp, #STACK_W
st1 {W0.16b-W3.16b}, [addr0], #64
st1 {W0.16b-W3.16b}, [addr0], #64
st1 {W0.16b-W3.16b}, [addr0]
mov sp, RFRAME
ldp x25, x26, [sp], #16
ldp x23, x24, [sp], #16
ldp x21, x22, [sp], #16
ldp x19, x20, [sp], #16
ldp x28, x29, [sp], #16
ret
SYM_FUNC_END(sm3_neon_transform)
.section ".rodata", "a"
.align 4
.LKtable:
.long 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb
.long 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc
.long 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce
.long 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6
.long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
.long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
.long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
.long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
.long 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53
.long 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d
.long 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4
.long 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43
.long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
.long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
.long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
.long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5

View File

@ -0,0 +1,103 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* sm3-neon-glue.c - SM3 secure hash using NEON instructions
*
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
int blocks);
static int sm3_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
if (!crypto_simd_usable()) {
sm3_update(shash_desc_ctx(desc), data, len);
return 0;
}
kernel_neon_begin();
sm3_base_do_update(desc, data, len, sm3_neon_transform);
kernel_neon_end();
return 0;
}
static int sm3_neon_final(struct shash_desc *desc, u8 *out)
{
if (!crypto_simd_usable()) {
sm3_final(shash_desc_ctx(desc), out);
return 0;
}
kernel_neon_begin();
sm3_base_do_finalize(desc, sm3_neon_transform);
kernel_neon_end();
return sm3_base_finish(desc, out);
}
static int sm3_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!crypto_simd_usable()) {
struct sm3_state *sctx = shash_desc_ctx(desc);
if (len)
sm3_update(sctx, data, len);
sm3_final(sctx, out);
return 0;
}
kernel_neon_begin();
if (len)
sm3_base_do_update(desc, data, len, sm3_neon_transform);
sm3_base_do_finalize(desc, sm3_neon_transform);
kernel_neon_end();
return sm3_base_finish(desc, out);
}
static struct shash_alg sm3_alg = {
.digestsize = SM3_DIGEST_SIZE,
.init = sm3_base_init,
.update = sm3_neon_update,
.final = sm3_neon_final,
.finup = sm3_neon_finup,
.descsize = sizeof(struct sm3_state),
.base.cra_name = "sm3",
.base.cra_driver_name = "sm3-neon",
.base.cra_blocksize = SM3_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
.base.cra_priority = 200,
};
static int __init sm3_neon_init(void)
{
return crypto_register_shash(&sm3_alg);
}
static void __exit sm3_neon_fini(void)
{
crypto_unregister_shash(&sm3_alg);
}
module_init(sm3_neon_init);
module_exit(sm3_neon_fini);
MODULE_DESCRIPTION("SM3 secure hash using NEON instructions");
MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_LICENSE("GPL v2");

View File

@ -0,0 +1,209 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 helper macros for Crypto Extensions
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#define SM4_PREPARE(ptr) \
ld1 {v24.16b-v27.16b}, [ptr], #64; \
ld1 {v28.16b-v31.16b}, [ptr];
#define SM4_CRYPT_BLK_BE(b0) \
sm4e b0.4s, v24.4s; \
sm4e b0.4s, v25.4s; \
sm4e b0.4s, v26.4s; \
sm4e b0.4s, v27.4s; \
sm4e b0.4s, v28.4s; \
sm4e b0.4s, v29.4s; \
sm4e b0.4s, v30.4s; \
sm4e b0.4s, v31.4s; \
rev64 b0.4s, b0.4s; \
ext b0.16b, b0.16b, b0.16b, #8; \
rev32 b0.16b, b0.16b;
#define SM4_CRYPT_BLK(b0) \
rev32 b0.16b, b0.16b; \
SM4_CRYPT_BLK_BE(b0);
#define SM4_CRYPT_BLK2_BE(b0, b1) \
sm4e b0.4s, v24.4s; \
sm4e b1.4s, v24.4s; \
sm4e b0.4s, v25.4s; \
sm4e b1.4s, v25.4s; \
sm4e b0.4s, v26.4s; \
sm4e b1.4s, v26.4s; \
sm4e b0.4s, v27.4s; \
sm4e b1.4s, v27.4s; \
sm4e b0.4s, v28.4s; \
sm4e b1.4s, v28.4s; \
sm4e b0.4s, v29.4s; \
sm4e b1.4s, v29.4s; \
sm4e b0.4s, v30.4s; \
sm4e b1.4s, v30.4s; \
sm4e b0.4s, v31.4s; \
sm4e b1.4s, v31.4s; \
rev64 b0.4s, b0.4s; \
rev64 b1.4s, b1.4s; \
ext b0.16b, b0.16b, b0.16b, #8; \
ext b1.16b, b1.16b, b1.16b, #8; \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
#define SM4_CRYPT_BLK2(b0, b1) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
SM4_CRYPT_BLK2_BE(b0, b1);
#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3) \
sm4e b0.4s, v24.4s; \
sm4e b1.4s, v24.4s; \
sm4e b2.4s, v24.4s; \
sm4e b3.4s, v24.4s; \
sm4e b0.4s, v25.4s; \
sm4e b1.4s, v25.4s; \
sm4e b2.4s, v25.4s; \
sm4e b3.4s, v25.4s; \
sm4e b0.4s, v26.4s; \
sm4e b1.4s, v26.4s; \
sm4e b2.4s, v26.4s; \
sm4e b3.4s, v26.4s; \
sm4e b0.4s, v27.4s; \
sm4e b1.4s, v27.4s; \
sm4e b2.4s, v27.4s; \
sm4e b3.4s, v27.4s; \
sm4e b0.4s, v28.4s; \
sm4e b1.4s, v28.4s; \
sm4e b2.4s, v28.4s; \
sm4e b3.4s, v28.4s; \
sm4e b0.4s, v29.4s; \
sm4e b1.4s, v29.4s; \
sm4e b2.4s, v29.4s; \
sm4e b3.4s, v29.4s; \
sm4e b0.4s, v30.4s; \
sm4e b1.4s, v30.4s; \
sm4e b2.4s, v30.4s; \
sm4e b3.4s, v30.4s; \
sm4e b0.4s, v31.4s; \
sm4e b1.4s, v31.4s; \
sm4e b2.4s, v31.4s; \
sm4e b3.4s, v31.4s; \
rev64 b0.4s, b0.4s; \
rev64 b1.4s, b1.4s; \
rev64 b2.4s, b2.4s; \
rev64 b3.4s, b3.4s; \
ext b0.16b, b0.16b, b0.16b, #8; \
ext b1.16b, b1.16b, b1.16b, #8; \
ext b2.16b, b2.16b, b2.16b, #8; \
ext b3.16b, b3.16b, b3.16b, #8; \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b;
#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
SM4_CRYPT_BLK4_BE(b0, b1, b2, b3);
#define SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7) \
sm4e b0.4s, v24.4s; \
sm4e b1.4s, v24.4s; \
sm4e b2.4s, v24.4s; \
sm4e b3.4s, v24.4s; \
sm4e b4.4s, v24.4s; \
sm4e b5.4s, v24.4s; \
sm4e b6.4s, v24.4s; \
sm4e b7.4s, v24.4s; \
sm4e b0.4s, v25.4s; \
sm4e b1.4s, v25.4s; \
sm4e b2.4s, v25.4s; \
sm4e b3.4s, v25.4s; \
sm4e b4.4s, v25.4s; \
sm4e b5.4s, v25.4s; \
sm4e b6.4s, v25.4s; \
sm4e b7.4s, v25.4s; \
sm4e b0.4s, v26.4s; \
sm4e b1.4s, v26.4s; \
sm4e b2.4s, v26.4s; \
sm4e b3.4s, v26.4s; \
sm4e b4.4s, v26.4s; \
sm4e b5.4s, v26.4s; \
sm4e b6.4s, v26.4s; \
sm4e b7.4s, v26.4s; \
sm4e b0.4s, v27.4s; \
sm4e b1.4s, v27.4s; \
sm4e b2.4s, v27.4s; \
sm4e b3.4s, v27.4s; \
sm4e b4.4s, v27.4s; \
sm4e b5.4s, v27.4s; \
sm4e b6.4s, v27.4s; \
sm4e b7.4s, v27.4s; \
sm4e b0.4s, v28.4s; \
sm4e b1.4s, v28.4s; \
sm4e b2.4s, v28.4s; \
sm4e b3.4s, v28.4s; \
sm4e b4.4s, v28.4s; \
sm4e b5.4s, v28.4s; \
sm4e b6.4s, v28.4s; \
sm4e b7.4s, v28.4s; \
sm4e b0.4s, v29.4s; \
sm4e b1.4s, v29.4s; \
sm4e b2.4s, v29.4s; \
sm4e b3.4s, v29.4s; \
sm4e b4.4s, v29.4s; \
sm4e b5.4s, v29.4s; \
sm4e b6.4s, v29.4s; \
sm4e b7.4s, v29.4s; \
sm4e b0.4s, v30.4s; \
sm4e b1.4s, v30.4s; \
sm4e b2.4s, v30.4s; \
sm4e b3.4s, v30.4s; \
sm4e b4.4s, v30.4s; \
sm4e b5.4s, v30.4s; \
sm4e b6.4s, v30.4s; \
sm4e b7.4s, v30.4s; \
sm4e b0.4s, v31.4s; \
sm4e b1.4s, v31.4s; \
sm4e b2.4s, v31.4s; \
sm4e b3.4s, v31.4s; \
sm4e b4.4s, v31.4s; \
sm4e b5.4s, v31.4s; \
sm4e b6.4s, v31.4s; \
sm4e b7.4s, v31.4s; \
rev64 b0.4s, b0.4s; \
rev64 b1.4s, b1.4s; \
rev64 b2.4s, b2.4s; \
rev64 b3.4s, b3.4s; \
rev64 b4.4s, b4.4s; \
rev64 b5.4s, b5.4s; \
rev64 b6.4s, b6.4s; \
rev64 b7.4s, b7.4s; \
ext b0.16b, b0.16b, b0.16b, #8; \
ext b1.16b, b1.16b, b1.16b, #8; \
ext b2.16b, b2.16b, b2.16b, #8; \
ext b3.16b, b3.16b, b3.16b, #8; \
ext b4.16b, b4.16b, b4.16b, #8; \
ext b5.16b, b5.16b, b5.16b, #8; \
ext b6.16b, b6.16b, b6.16b, #8; \
ext b7.16b, b7.16b, b7.16b, #8; \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
rev32 b4.16b, b4.16b; \
rev32 b5.16b, b5.16b; \
rev32 b6.16b, b6.16b; \
rev32 b7.16b, b7.16b;
#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
rev32 b4.16b, b4.16b; \
rev32 b5.16b, b5.16b; \
rev32 b6.16b, b6.16b; \
rev32 b7.16b, b7.16b; \
SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7);

View File

@ -0,0 +1,328 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
* as specified in rfc8998
* https://datatracker.ietf.org/doc/html/rfc8998
*
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include "sm4-ce-asm.h"
.arch armv8-a+crypto
.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31
.set .Lv\b\().4s, \b
.endr
.macro sm4e, vd, vn
.inst 0xcec08400 | (.L\vn << 5) | .L\vd
.endm
/* Register macros */
#define RMAC v16
/* Helper macros. */
#define inc_le128(vctr) \
mov vctr.d[1], x8; \
mov vctr.d[0], x7; \
adds x8, x8, #1; \
rev64 vctr.16b, vctr.16b; \
adc x7, x7, xzr;
.align 3
SYM_FUNC_START(sm4_ce_cbcmac_update)
/* input:
* x0: round key array, CTX
* x1: mac
* x2: src
* w3: nblocks
*/
SM4_PREPARE(x0)
ld1 {RMAC.16b}, [x1]
.Lcbcmac_loop_4x:
cmp w3, #4
blt .Lcbcmac_loop_1x
sub w3, w3, #4
ld1 {v0.16b-v3.16b}, [x2], #64
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v0.16b
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v1.16b
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v2.16b
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v3.16b
cbz w3, .Lcbcmac_end
b .Lcbcmac_loop_4x
.Lcbcmac_loop_1x:
sub w3, w3, #1
ld1 {v0.16b}, [x2], #16
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v0.16b
cbnz w3, .Lcbcmac_loop_1x
.Lcbcmac_end:
st1 {RMAC.16b}, [x1]
ret
SYM_FUNC_END(sm4_ce_cbcmac_update)
.align 3
SYM_FUNC_START(sm4_ce_ccm_final)
/* input:
* x0: round key array, CTX
* x1: ctr0 (big endian, 128 bit)
* x2: mac
*/
SM4_PREPARE(x0)
ld1 {RMAC.16b}, [x2]
ld1 {v0.16b}, [x1]
SM4_CRYPT_BLK2(RMAC, v0)
/* en-/decrypt the mac with ctr0 */
eor RMAC.16b, RMAC.16b, v0.16b
st1 {RMAC.16b}, [x2]
ret
SYM_FUNC_END(sm4_ce_ccm_final)
.align 3
SYM_FUNC_START(sm4_ce_ccm_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: ctr (big endian, 128 bit)
* w4: nbytes
* x5: mac
*/
SM4_PREPARE(x0)
ldp x7, x8, [x3]
rev x7, x7
rev x8, x8
ld1 {RMAC.16b}, [x5]
.Lccm_enc_loop_4x:
cmp w4, #(4 * 16)
blt .Lccm_enc_loop_1x
sub w4, w4, #(4 * 16)
/* construct CTRs */
inc_le128(v8) /* +0 */
inc_le128(v9) /* +1 */
inc_le128(v10) /* +2 */
inc_le128(v11) /* +3 */
ld1 {v0.16b-v3.16b}, [x2], #64
SM4_CRYPT_BLK2(v8, RMAC)
eor v8.16b, v8.16b, v0.16b
eor RMAC.16b, RMAC.16b, v0.16b
SM4_CRYPT_BLK2(v9, RMAC)
eor v9.16b, v9.16b, v1.16b
eor RMAC.16b, RMAC.16b, v1.16b
SM4_CRYPT_BLK2(v10, RMAC)
eor v10.16b, v10.16b, v2.16b
eor RMAC.16b, RMAC.16b, v2.16b
SM4_CRYPT_BLK2(v11, RMAC)
eor v11.16b, v11.16b, v3.16b
eor RMAC.16b, RMAC.16b, v3.16b
st1 {v8.16b-v11.16b}, [x1], #64
cbz w4, .Lccm_enc_end
b .Lccm_enc_loop_4x
.Lccm_enc_loop_1x:
cmp w4, #16
blt .Lccm_enc_tail
sub w4, w4, #16
/* construct CTRs */
inc_le128(v8)
ld1 {v0.16b}, [x2], #16
SM4_CRYPT_BLK2(v8, RMAC)
eor v8.16b, v8.16b, v0.16b
eor RMAC.16b, RMAC.16b, v0.16b
st1 {v8.16b}, [x1], #16
cbz w4, .Lccm_enc_end
b .Lccm_enc_loop_1x
.Lccm_enc_tail:
/* construct CTRs */
inc_le128(v8)
SM4_CRYPT_BLK2(RMAC, v8)
/* store new MAC */
st1 {RMAC.16b}, [x5]
.Lccm_enc_tail_loop:
ldrb w0, [x2], #1 /* get 1 byte from input */
umov w9, v8.b[0] /* get top crypted CTR byte */
umov w6, RMAC.b[0] /* get top MAC byte */
eor w9, w9, w0 /* w9 = CTR ^ input */
eor w6, w6, w0 /* w6 = MAC ^ input */
strb w9, [x1], #1 /* store out byte */
strb w6, [x5], #1 /* store MAC byte */
subs w4, w4, #1
beq .Lccm_enc_ret
/* shift out one byte */
ext RMAC.16b, RMAC.16b, RMAC.16b, #1
ext v8.16b, v8.16b, v8.16b, #1
b .Lccm_enc_tail_loop
.Lccm_enc_end:
/* store new MAC */
st1 {RMAC.16b}, [x5]
/* store new CTR */
rev x7, x7
rev x8, x8
stp x7, x8, [x3]
.Lccm_enc_ret:
ret
SYM_FUNC_END(sm4_ce_ccm_enc)
.align 3
SYM_FUNC_START(sm4_ce_ccm_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: ctr (big endian, 128 bit)
* w4: nbytes
* x5: mac
*/
SM4_PREPARE(x0)
ldp x7, x8, [x3]
rev x7, x7
rev x8, x8
ld1 {RMAC.16b}, [x5]
.Lccm_dec_loop_4x:
cmp w4, #(4 * 16)
blt .Lccm_dec_loop_1x
sub w4, w4, #(4 * 16)
/* construct CTRs */
inc_le128(v8) /* +0 */
inc_le128(v9) /* +1 */
inc_le128(v10) /* +2 */
inc_le128(v11) /* +3 */
ld1 {v0.16b-v3.16b}, [x2], #64
SM4_CRYPT_BLK2(v8, RMAC)
eor v8.16b, v8.16b, v0.16b
eor RMAC.16b, RMAC.16b, v8.16b
SM4_CRYPT_BLK2(v9, RMAC)
eor v9.16b, v9.16b, v1.16b
eor RMAC.16b, RMAC.16b, v9.16b
SM4_CRYPT_BLK2(v10, RMAC)
eor v10.16b, v10.16b, v2.16b
eor RMAC.16b, RMAC.16b, v10.16b
SM4_CRYPT_BLK2(v11, RMAC)
eor v11.16b, v11.16b, v3.16b
eor RMAC.16b, RMAC.16b, v11.16b
st1 {v8.16b-v11.16b}, [x1], #64
cbz w4, .Lccm_dec_end
b .Lccm_dec_loop_4x
.Lccm_dec_loop_1x:
cmp w4, #16
blt .Lccm_dec_tail
sub w4, w4, #16
/* construct CTRs */
inc_le128(v8)
ld1 {v0.16b}, [x2], #16
SM4_CRYPT_BLK2(v8, RMAC)
eor v8.16b, v8.16b, v0.16b
eor RMAC.16b, RMAC.16b, v8.16b
st1 {v8.16b}, [x1], #16
cbz w4, .Lccm_dec_end
b .Lccm_dec_loop_1x
.Lccm_dec_tail:
/* construct CTRs */
inc_le128(v8)
SM4_CRYPT_BLK2(RMAC, v8)
/* store new MAC */
st1 {RMAC.16b}, [x5]
.Lccm_dec_tail_loop:
ldrb w0, [x2], #1 /* get 1 byte from input */
umov w9, v8.b[0] /* get top crypted CTR byte */
umov w6, RMAC.b[0] /* get top MAC byte */
eor w9, w9, w0 /* w9 = CTR ^ input */
eor w6, w6, w9 /* w6 = MAC ^ output */
strb w9, [x1], #1 /* store out byte */
strb w6, [x5], #1 /* store MAC byte */
subs w4, w4, #1
beq .Lccm_dec_ret
/* shift out one byte */
ext RMAC.16b, RMAC.16b, RMAC.16b, #1
ext v8.16b, v8.16b, v8.16b, #1
b .Lccm_dec_tail_loop
.Lccm_dec_end:
/* store new MAC */
st1 {RMAC.16b}, [x5]
/* store new CTR */
rev x7, x7
rev x8, x8
stp x7, x8, [x3]
.Lccm_dec_ret:
ret
SYM_FUNC_END(sm4_ce_ccm_dec)

View File

@ -0,0 +1,303 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
* as specified in rfc8998
* https://datatracker.ietf.org/doc/html/rfc8998
*
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#include "sm4-ce.h"
asmlinkage void sm4_ce_cbcmac_update(const u32 *rkey_enc, u8 *mac,
const u8 *src, unsigned int nblocks);
asmlinkage void sm4_ce_ccm_enc(const u32 *rkey_enc, u8 *dst, const u8 *src,
u8 *iv, unsigned int nbytes, u8 *mac);
asmlinkage void sm4_ce_ccm_dec(const u32 *rkey_enc, u8 *dst, const u8 *src,
u8 *iv, unsigned int nbytes, u8 *mac);
asmlinkage void sm4_ce_ccm_final(const u32 *rkey_enc, u8 *iv, u8 *mac);
static int ccm_setkey(struct crypto_aead *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_ctx *ctx = crypto_aead_ctx(tfm);
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
return 0;
}
static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
{
if ((authsize & 1) || authsize < 4)
return -EINVAL;
return 0;
}
static int ccm_format_input(u8 info[], struct aead_request *req,
unsigned int msglen)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
unsigned int l = req->iv[0] + 1;
unsigned int m;
__be32 len;
/* verify that CCM dimension 'L': 2 <= L <= 8 */
if (l < 2 || l > 8)
return -EINVAL;
if (l < 4 && msglen >> (8 * l))
return -EOVERFLOW;
memset(&req->iv[SM4_BLOCK_SIZE - l], 0, l);
memcpy(info, req->iv, SM4_BLOCK_SIZE);
m = crypto_aead_authsize(aead);
/* format flags field per RFC 3610/NIST 800-38C */
*info |= ((m - 2) / 2) << 3;
if (req->assoclen)
*info |= (1 << 6);
/*
* format message length field,
* Linux uses a u32 type to represent msglen
*/
if (l >= 4)
l = 4;
len = cpu_to_be32(msglen);
memcpy(&info[SM4_BLOCK_SIZE - l], (u8 *)&len + 4 - l, l);
return 0;
}
static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct sm4_ctx *ctx = crypto_aead_ctx(aead);
struct __packed { __be16 l; __be32 h; } aadlen;
u32 assoclen = req->assoclen;
struct scatter_walk walk;
unsigned int len;
if (assoclen < 0xff00) {
aadlen.l = cpu_to_be16(assoclen);
len = 2;
} else {
aadlen.l = cpu_to_be16(0xfffe);
put_unaligned_be32(assoclen, &aadlen.h);
len = 6;
}
sm4_ce_crypt_block(ctx->rkey_enc, mac, mac);
crypto_xor(mac, (const u8 *)&aadlen, len);
scatterwalk_start(&walk, req->src);
do {
u32 n = scatterwalk_clamp(&walk, assoclen);
u8 *p, *ptr;
if (!n) {
scatterwalk_start(&walk, sg_next(walk.sg));
n = scatterwalk_clamp(&walk, assoclen);
}
p = ptr = scatterwalk_map(&walk);
assoclen -= n;
scatterwalk_advance(&walk, n);
while (n > 0) {
unsigned int l, nblocks;
if (len == SM4_BLOCK_SIZE) {
if (n < SM4_BLOCK_SIZE) {
sm4_ce_crypt_block(ctx->rkey_enc,
mac, mac);
len = 0;
} else {
nblocks = n / SM4_BLOCK_SIZE;
sm4_ce_cbcmac_update(ctx->rkey_enc,
mac, ptr, nblocks);
ptr += nblocks * SM4_BLOCK_SIZE;
n %= SM4_BLOCK_SIZE;
continue;
}
}
l = min(n, SM4_BLOCK_SIZE - len);
if (l) {
crypto_xor(mac + len, ptr, l);
len += l;
ptr += l;
n -= l;
}
}
scatterwalk_unmap(p);
scatterwalk_done(&walk, 0, assoclen);
} while (assoclen);
}
static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
u32 *rkey_enc, u8 mac[],
void (*sm4_ce_ccm_crypt)(const u32 *rkey_enc, u8 *dst,
const u8 *src, u8 *iv,
unsigned int nbytes, u8 *mac))
{
u8 __aligned(8) ctr0[SM4_BLOCK_SIZE];
int err;
/* preserve the initial ctr0 for the TAG */
memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE);
crypto_inc(walk->iv, SM4_BLOCK_SIZE);
kernel_neon_begin();
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
do {
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
const u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
if (walk->nbytes == walk->total)
tail = 0;
if (walk->nbytes - tail)
sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv,
walk->nbytes - tail, mac);
if (walk->nbytes == walk->total)
sm4_ce_ccm_final(rkey_enc, ctr0, mac);
kernel_neon_end();
if (walk->nbytes) {
err = skcipher_walk_done(walk, tail);
if (err)
return err;
if (walk->nbytes)
kernel_neon_begin();
}
} while (walk->nbytes > 0);
return 0;
}
static int ccm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct sm4_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) mac[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = ccm_format_input(mac, req, req->cryptlen);
if (err)
return err;
err = skcipher_walk_aead_encrypt(&walk, req, false);
if (err)
return err;
err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_enc);
if (err)
return err;
/* copy authtag to end of dst */
scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
crypto_aead_authsize(aead), 1);
return 0;
}
static int ccm_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(aead);
struct sm4_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) mac[SM4_BLOCK_SIZE];
u8 authtag[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = ccm_format_input(mac, req, req->cryptlen - authsize);
if (err)
return err;
err = skcipher_walk_aead_decrypt(&walk, req, false);
if (err)
return err;
err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_dec);
if (err)
return err;
/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(authtag, req->src,
req->assoclen + req->cryptlen - authsize,
authsize, 0);
if (crypto_memneq(authtag, mac, authsize))
return -EBADMSG;
return 0;
}
static struct aead_alg sm4_ccm_alg = {
.base = {
.cra_name = "ccm(sm4)",
.cra_driver_name = "ccm-sm4-ce",
.cra_priority = 400,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.ivsize = SM4_BLOCK_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.maxauthsize = SM4_BLOCK_SIZE,
.setkey = ccm_setkey,
.setauthsize = ccm_setauthsize,
.encrypt = ccm_encrypt,
.decrypt = ccm_decrypt,
};
static int __init sm4_ce_ccm_init(void)
{
return crypto_register_aead(&sm4_ccm_alg);
}
static void __exit sm4_ce_ccm_exit(void)
{
crypto_unregister_aead(&sm4_ccm_alg);
}
module_cpu_feature_match(SM4, sm4_ce_ccm_init);
module_exit(sm4_ce_ccm_exit);
MODULE_DESCRIPTION("Synchronous SM4 in CCM mode using ARMv8 Crypto Extensions");
MODULE_ALIAS_CRYPTO("ccm(sm4)");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_LICENSE("GPL v2");

View File

@ -2,11 +2,11 @@
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/algapi.h>
#include <crypto/sm4.h>
#include <crypto/internal/simd.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/types.h>
MODULE_ALIAS_CRYPTO("sm4");

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,741 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
* as specified in rfc8998
* https://datatracker.ietf.org/doc/html/rfc8998
*
* Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include "sm4-ce-asm.h"
.arch armv8-a+crypto
.irp b, 0, 1, 2, 3, 24, 25, 26, 27, 28, 29, 30, 31
.set .Lv\b\().4s, \b
.endr
.macro sm4e, vd, vn
.inst 0xcec08400 | (.L\vn << 5) | .L\vd
.endm
/* Register macros */
/* Used for both encryption and decryption */
#define RHASH v21
#define RRCONST v22
#define RZERO v23
/* Helper macros. */
/*
* input: m0, m1
* output: r0:r1 (low 128-bits in r0, high in r1)
*/
#define PMUL_128x128(r0, r1, m0, m1, T0, T1) \
ext T0.16b, m1.16b, m1.16b, #8; \
pmull r0.1q, m0.1d, m1.1d; \
pmull T1.1q, m0.1d, T0.1d; \
pmull2 T0.1q, m0.2d, T0.2d; \
pmull2 r1.1q, m0.2d, m1.2d; \
eor T0.16b, T0.16b, T1.16b; \
ext T1.16b, RZERO.16b, T0.16b, #8; \
ext T0.16b, T0.16b, RZERO.16b, #8; \
eor r0.16b, r0.16b, T1.16b; \
eor r1.16b, r1.16b, T0.16b;
#define PMUL_128x128_4x(r0, r1, m0, m1, T0, T1, \
r2, r3, m2, m3, T2, T3, \
r4, r5, m4, m5, T4, T5, \
r6, r7, m6, m7, T6, T7) \
ext T0.16b, m1.16b, m1.16b, #8; \
ext T2.16b, m3.16b, m3.16b, #8; \
ext T4.16b, m5.16b, m5.16b, #8; \
ext T6.16b, m7.16b, m7.16b, #8; \
pmull r0.1q, m0.1d, m1.1d; \
pmull r2.1q, m2.1d, m3.1d; \
pmull r4.1q, m4.1d, m5.1d; \
pmull r6.1q, m6.1d, m7.1d; \
pmull T1.1q, m0.1d, T0.1d; \
pmull T3.1q, m2.1d, T2.1d; \
pmull T5.1q, m4.1d, T4.1d; \
pmull T7.1q, m6.1d, T6.1d; \
pmull2 T0.1q, m0.2d, T0.2d; \
pmull2 T2.1q, m2.2d, T2.2d; \
pmull2 T4.1q, m4.2d, T4.2d; \
pmull2 T6.1q, m6.2d, T6.2d; \
pmull2 r1.1q, m0.2d, m1.2d; \
pmull2 r3.1q, m2.2d, m3.2d; \
pmull2 r5.1q, m4.2d, m5.2d; \
pmull2 r7.1q, m6.2d, m7.2d; \
eor T0.16b, T0.16b, T1.16b; \
eor T2.16b, T2.16b, T3.16b; \
eor T4.16b, T4.16b, T5.16b; \
eor T6.16b, T6.16b, T7.16b; \
ext T1.16b, RZERO.16b, T0.16b, #8; \
ext T3.16b, RZERO.16b, T2.16b, #8; \
ext T5.16b, RZERO.16b, T4.16b, #8; \
ext T7.16b, RZERO.16b, T6.16b, #8; \
ext T0.16b, T0.16b, RZERO.16b, #8; \
ext T2.16b, T2.16b, RZERO.16b, #8; \
ext T4.16b, T4.16b, RZERO.16b, #8; \
ext T6.16b, T6.16b, RZERO.16b, #8; \
eor r0.16b, r0.16b, T1.16b; \
eor r2.16b, r2.16b, T3.16b; \
eor r4.16b, r4.16b, T5.16b; \
eor r6.16b, r6.16b, T7.16b; \
eor r1.16b, r1.16b, T0.16b; \
eor r3.16b, r3.16b, T2.16b; \
eor r5.16b, r5.16b, T4.16b; \
eor r7.16b, r7.16b, T6.16b;
/*
* input: r0:r1 (low 128-bits in r0, high in r1)
* output: a
*/
#define REDUCTION(a, r0, r1, rconst, T0, T1) \
pmull2 T0.1q, r1.2d, rconst.2d; \
ext T1.16b, T0.16b, RZERO.16b, #8; \
ext T0.16b, RZERO.16b, T0.16b, #8; \
eor r1.16b, r1.16b, T1.16b; \
eor r0.16b, r0.16b, T0.16b; \
pmull T0.1q, r1.1d, rconst.1d; \
eor a.16b, r0.16b, T0.16b;
#define SM4_CRYPT_PMUL_128x128_BLK(b0, r0, r1, m0, m1, T0, T1) \
rev32 b0.16b, b0.16b; \
ext T0.16b, m1.16b, m1.16b, #8; \
sm4e b0.4s, v24.4s; \
pmull r0.1q, m0.1d, m1.1d; \
sm4e b0.4s, v25.4s; \
pmull T1.1q, m0.1d, T0.1d; \
sm4e b0.4s, v26.4s; \
pmull2 T0.1q, m0.2d, T0.2d; \
sm4e b0.4s, v27.4s; \
pmull2 r1.1q, m0.2d, m1.2d; \
sm4e b0.4s, v28.4s; \
eor T0.16b, T0.16b, T1.16b; \
sm4e b0.4s, v29.4s; \
ext T1.16b, RZERO.16b, T0.16b, #8; \
sm4e b0.4s, v30.4s; \
ext T0.16b, T0.16b, RZERO.16b, #8; \
sm4e b0.4s, v31.4s; \
eor r0.16b, r0.16b, T1.16b; \
rev64 b0.4s, b0.4s; \
eor r1.16b, r1.16b, T0.16b; \
ext b0.16b, b0.16b, b0.16b, #8; \
rev32 b0.16b, b0.16b;
#define SM4_CRYPT_PMUL_128x128_BLK3(b0, b1, b2, \
r0, r1, m0, m1, T0, T1, \
r2, r3, m2, m3, T2, T3, \
r4, r5, m4, m5, T4, T5) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
ext T0.16b, m1.16b, m1.16b, #8; \
ext T2.16b, m3.16b, m3.16b, #8; \
ext T4.16b, m5.16b, m5.16b, #8; \
sm4e b0.4s, v24.4s; \
sm4e b1.4s, v24.4s; \
sm4e b2.4s, v24.4s; \
pmull r0.1q, m0.1d, m1.1d; \
pmull r2.1q, m2.1d, m3.1d; \
pmull r4.1q, m4.1d, m5.1d; \
sm4e b0.4s, v25.4s; \
sm4e b1.4s, v25.4s; \
sm4e b2.4s, v25.4s; \
pmull T1.1q, m0.1d, T0.1d; \
pmull T3.1q, m2.1d, T2.1d; \
pmull T5.1q, m4.1d, T4.1d; \
sm4e b0.4s, v26.4s; \
sm4e b1.4s, v26.4s; \
sm4e b2.4s, v26.4s; \
pmull2 T0.1q, m0.2d, T0.2d; \
pmull2 T2.1q, m2.2d, T2.2d; \
pmull2 T4.1q, m4.2d, T4.2d; \
sm4e b0.4s, v27.4s; \
sm4e b1.4s, v27.4s; \
sm4e b2.4s, v27.4s; \
pmull2 r1.1q, m0.2d, m1.2d; \
pmull2 r3.1q, m2.2d, m3.2d; \
pmull2 r5.1q, m4.2d, m5.2d; \
sm4e b0.4s, v28.4s; \
sm4e b1.4s, v28.4s; \
sm4e b2.4s, v28.4s; \
eor T0.16b, T0.16b, T1.16b; \
eor T2.16b, T2.16b, T3.16b; \
eor T4.16b, T4.16b, T5.16b; \
sm4e b0.4s, v29.4s; \
sm4e b1.4s, v29.4s; \
sm4e b2.4s, v29.4s; \
ext T1.16b, RZERO.16b, T0.16b, #8; \
ext T3.16b, RZERO.16b, T2.16b, #8; \
ext T5.16b, RZERO.16b, T4.16b, #8; \
sm4e b0.4s, v30.4s; \
sm4e b1.4s, v30.4s; \
sm4e b2.4s, v30.4s; \
ext T0.16b, T0.16b, RZERO.16b, #8; \
ext T2.16b, T2.16b, RZERO.16b, #8; \
ext T4.16b, T4.16b, RZERO.16b, #8; \
sm4e b0.4s, v31.4s; \
sm4e b1.4s, v31.4s; \
sm4e b2.4s, v31.4s; \
eor r0.16b, r0.16b, T1.16b; \
eor r2.16b, r2.16b, T3.16b; \
eor r4.16b, r4.16b, T5.16b; \
rev64 b0.4s, b0.4s; \
rev64 b1.4s, b1.4s; \
rev64 b2.4s, b2.4s; \
eor r1.16b, r1.16b, T0.16b; \
eor r3.16b, r3.16b, T2.16b; \
eor r5.16b, r5.16b, T4.16b; \
ext b0.16b, b0.16b, b0.16b, #8; \
ext b1.16b, b1.16b, b1.16b, #8; \
ext b2.16b, b2.16b, b2.16b, #8; \
eor r0.16b, r0.16b, r2.16b; \
eor r1.16b, r1.16b, r3.16b; \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
eor r0.16b, r0.16b, r4.16b; \
eor r1.16b, r1.16b, r5.16b;
#define inc32_le128(vctr) \
mov vctr.d[1], x9; \
add w6, w9, #1; \
mov vctr.d[0], x8; \
bfi x9, x6, #0, #32; \
rev64 vctr.16b, vctr.16b;
#define GTAG_HASH_LENGTHS(vctr0, vlen) \
ld1 {vlen.16b}, [x7]; \
/* construct CTR0 */ \
/* the lower 32-bits of initial IV is always be32(1) */ \
mov x6, #0x1; \
bfi x9, x6, #0, #32; \
mov vctr0.d[0], x8; \
mov vctr0.d[1], x9; \
rbit vlen.16b, vlen.16b; \
rev64 vctr0.16b, vctr0.16b; \
/* authtag = GCTR(CTR0, GHASH) */ \
eor RHASH.16b, RHASH.16b, vlen.16b; \
SM4_CRYPT_PMUL_128x128_BLK(vctr0, RR0, RR1, RHASH, RH1, \
RTMP0, RTMP1); \
REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3); \
rbit RHASH.16b, RHASH.16b; \
eor RHASH.16b, RHASH.16b, vctr0.16b;
/* Register macros for encrypt and ghash */
/* can be the same as input v0-v3 */
#define RR1 v0
#define RR3 v1
#define RR5 v2
#define RR7 v3
#define RR0 v4
#define RR2 v5
#define RR4 v6
#define RR6 v7
#define RTMP0 v8
#define RTMP1 v9
#define RTMP2 v10
#define RTMP3 v11
#define RTMP4 v12
#define RTMP5 v13
#define RTMP6 v14
#define RTMP7 v15
#define RH1 v16
#define RH2 v17
#define RH3 v18
#define RH4 v19
.align 3
SYM_FUNC_START(sm4_ce_pmull_ghash_setup)
/* input:
* x0: round key array, CTX
* x1: ghash table
*/
SM4_PREPARE(x0)
adr_l x2, .Lghash_rconst
ld1r {RRCONST.2d}, [x2]
eor RZERO.16b, RZERO.16b, RZERO.16b
/* H = E(K, 0^128) */
rev32 v0.16b, RZERO.16b
SM4_CRYPT_BLK_BE(v0)
/* H ^ 1 */
rbit RH1.16b, v0.16b
/* H ^ 2 */
PMUL_128x128(RR0, RR1, RH1, RH1, RTMP0, RTMP1)
REDUCTION(RH2, RR0, RR1, RRCONST, RTMP2, RTMP3)
/* H ^ 3 */
PMUL_128x128(RR0, RR1, RH2, RH1, RTMP0, RTMP1)
REDUCTION(RH3, RR0, RR1, RRCONST, RTMP2, RTMP3)
/* H ^ 4 */
PMUL_128x128(RR0, RR1, RH2, RH2, RTMP0, RTMP1)
REDUCTION(RH4, RR0, RR1, RRCONST, RTMP2, RTMP3)
st1 {RH1.16b-RH4.16b}, [x1]
ret
SYM_FUNC_END(sm4_ce_pmull_ghash_setup)
.align 3
SYM_FUNC_START(pmull_ghash_update)
/* input:
* x0: ghash table
* x1: ghash result
* x2: src
* w3: nblocks
*/
ld1 {RH1.16b-RH4.16b}, [x0]
ld1 {RHASH.16b}, [x1]
rbit RHASH.16b, RHASH.16b
adr_l x4, .Lghash_rconst
ld1r {RRCONST.2d}, [x4]
eor RZERO.16b, RZERO.16b, RZERO.16b
.Lghash_loop_4x:
cmp w3, #4
blt .Lghash_loop_1x
sub w3, w3, #4
ld1 {v0.16b-v3.16b}, [x2], #64
rbit v0.16b, v0.16b
rbit v1.16b, v1.16b
rbit v2.16b, v2.16b
rbit v3.16b, v3.16b
/*
* (in0 ^ HASH) * H^4 => rr0:rr1
* (in1) * H^3 => rr2:rr3
* (in2) * H^2 => rr4:rr5
* (in3) * H^1 => rr6:rr7
*/
eor RHASH.16b, RHASH.16b, v0.16b
PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1,
RR2, RR3, v1, RH3, RTMP2, RTMP3,
RR4, RR5, v2, RH2, RTMP4, RTMP5,
RR6, RR7, v3, RH1, RTMP6, RTMP7)
eor RR0.16b, RR0.16b, RR2.16b
eor RR1.16b, RR1.16b, RR3.16b
eor RR0.16b, RR0.16b, RR4.16b
eor RR1.16b, RR1.16b, RR5.16b
eor RR0.16b, RR0.16b, RR6.16b
eor RR1.16b, RR1.16b, RR7.16b
REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
cbz w3, .Lghash_end
b .Lghash_loop_4x
.Lghash_loop_1x:
sub w3, w3, #1
ld1 {v0.16b}, [x2], #16
rbit v0.16b, v0.16b
eor RHASH.16b, RHASH.16b, v0.16b
PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
cbnz w3, .Lghash_loop_1x
.Lghash_end:
rbit RHASH.16b, RHASH.16b
st1 {RHASH.2d}, [x1]
ret
SYM_FUNC_END(pmull_ghash_update)
.align 3
SYM_FUNC_START(sm4_ce_pmull_gcm_enc)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: ctr (big endian, 128 bit)
* w4: nbytes
* x5: ghash result
* x6: ghash table
* x7: lengths (only for last block)
*/
SM4_PREPARE(x0)
ldp x8, x9, [x3]
rev x8, x8
rev x9, x9
ld1 {RH1.16b-RH4.16b}, [x6]
ld1 {RHASH.16b}, [x5]
rbit RHASH.16b, RHASH.16b
adr_l x6, .Lghash_rconst
ld1r {RRCONST.2d}, [x6]
eor RZERO.16b, RZERO.16b, RZERO.16b
cbz w4, .Lgcm_enc_hash_len
.Lgcm_enc_loop_4x:
cmp w4, #(4 * 16)
blt .Lgcm_enc_loop_1x
sub w4, w4, #(4 * 16)
/* construct CTRs */
inc32_le128(v0) /* +0 */
inc32_le128(v1) /* +1 */
inc32_le128(v2) /* +2 */
inc32_le128(v3) /* +3 */
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
SM4_CRYPT_BLK4(v0, v1, v2, v3)
eor v0.16b, v0.16b, RTMP0.16b
eor v1.16b, v1.16b, RTMP1.16b
eor v2.16b, v2.16b, RTMP2.16b
eor v3.16b, v3.16b, RTMP3.16b
st1 {v0.16b-v3.16b}, [x1], #64
/* ghash update */
rbit v0.16b, v0.16b
rbit v1.16b, v1.16b
rbit v2.16b, v2.16b
rbit v3.16b, v3.16b
/*
* (in0 ^ HASH) * H^4 => rr0:rr1
* (in1) * H^3 => rr2:rr3
* (in2) * H^2 => rr4:rr5
* (in3) * H^1 => rr6:rr7
*/
eor RHASH.16b, RHASH.16b, v0.16b
PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1,
RR2, RR3, v1, RH3, RTMP2, RTMP3,
RR4, RR5, v2, RH2, RTMP4, RTMP5,
RR6, RR7, v3, RH1, RTMP6, RTMP7)
eor RR0.16b, RR0.16b, RR2.16b
eor RR1.16b, RR1.16b, RR3.16b
eor RR0.16b, RR0.16b, RR4.16b
eor RR1.16b, RR1.16b, RR5.16b
eor RR0.16b, RR0.16b, RR6.16b
eor RR1.16b, RR1.16b, RR7.16b
REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
cbz w4, .Lgcm_enc_hash_len
b .Lgcm_enc_loop_4x
.Lgcm_enc_loop_1x:
cmp w4, #16
blt .Lgcm_enc_tail
sub w4, w4, #16
/* construct CTRs */
inc32_le128(v0)
ld1 {RTMP0.16b}, [x2], #16
SM4_CRYPT_BLK(v0)
eor v0.16b, v0.16b, RTMP0.16b
st1 {v0.16b}, [x1], #16
/* ghash update */
rbit v0.16b, v0.16b
eor RHASH.16b, RHASH.16b, v0.16b
PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
cbz w4, .Lgcm_enc_hash_len
b .Lgcm_enc_loop_1x
.Lgcm_enc_tail:
/* construct CTRs */
inc32_le128(v0)
SM4_CRYPT_BLK(v0)
/* load permute table */
adr_l x0, .Lcts_permute_table
add x0, x0, #32
sub x0, x0, w4, uxtw
ld1 {v3.16b}, [x0]
.Lgcm_enc_tail_loop:
/* do encrypt */
ldrb w0, [x2], #1 /* get 1 byte from input */
umov w6, v0.b[0] /* get top crypted byte */
eor w6, w6, w0 /* w6 = CTR ^ input */
strb w6, [x1], #1 /* store out byte */
/* shift right out one byte */
ext v0.16b, v0.16b, v0.16b, #1
/* the last ciphertext is placed in high bytes */
ins v0.b[15], w6
subs w4, w4, #1
bne .Lgcm_enc_tail_loop
/* padding last block with zeros */
tbl v0.16b, {v0.16b}, v3.16b
/* ghash update */
rbit v0.16b, v0.16b
eor RHASH.16b, RHASH.16b, v0.16b
PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
.Lgcm_enc_hash_len:
cbz x7, .Lgcm_enc_end
GTAG_HASH_LENGTHS(v1, v3)
b .Lgcm_enc_ret
.Lgcm_enc_end:
/* store new CTR */
rev x8, x8
rev x9, x9
stp x8, x9, [x3]
rbit RHASH.16b, RHASH.16b
.Lgcm_enc_ret:
/* store new MAC */
st1 {RHASH.2d}, [x5]
ret
SYM_FUNC_END(sm4_ce_pmull_gcm_enc)
#undef RR1
#undef RR3
#undef RR5
#undef RR7
#undef RR0
#undef RR2
#undef RR4
#undef RR6
#undef RTMP0
#undef RTMP1
#undef RTMP2
#undef RTMP3
#undef RTMP4
#undef RTMP5
#undef RTMP6
#undef RTMP7
#undef RH1
#undef RH2
#undef RH3
#undef RH4
/* Register macros for decrypt */
/* v0-v2 for building CTRs, v3-v5 for saving inputs */
#define RR1 v6
#define RR3 v7
#define RR5 v8
#define RR0 v9
#define RR2 v10
#define RR4 v11
#define RTMP0 v12
#define RTMP1 v13
#define RTMP2 v14
#define RTMP3 v15
#define RTMP4 v16
#define RTMP5 v17
#define RH1 v18
#define RH2 v19
#define RH3 v20
.align 3
SYM_FUNC_START(sm4_ce_pmull_gcm_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: ctr (big endian, 128 bit)
* w4: nbytes
* x5: ghash result
* x6: ghash table
* x7: lengths (only for last block)
*/
SM4_PREPARE(x0)
ldp x8, x9, [x3]
rev x8, x8
rev x9, x9
ld1 {RH1.16b-RH3.16b}, [x6]
ld1 {RHASH.16b}, [x5]
rbit RHASH.16b, RHASH.16b
adr_l x6, .Lghash_rconst
ld1r {RRCONST.2d}, [x6]
eor RZERO.16b, RZERO.16b, RZERO.16b
cbz w4, .Lgcm_dec_hash_len
.Lgcm_dec_loop_3x:
cmp w4, #(3 * 16)
blt .Lgcm_dec_loop_1x
sub w4, w4, #(3 * 16)
ld1 {v3.16b-v5.16b}, [x2], #(3 * 16)
/* construct CTRs */
inc32_le128(v0) /* +0 */
rbit v6.16b, v3.16b
inc32_le128(v1) /* +1 */
rbit v7.16b, v4.16b
inc32_le128(v2) /* +2 */
rbit v8.16b, v5.16b
eor RHASH.16b, RHASH.16b, v6.16b
/* decrypt & ghash update */
SM4_CRYPT_PMUL_128x128_BLK3(v0, v1, v2,
RR0, RR1, RHASH, RH3, RTMP0, RTMP1,
RR2, RR3, v7, RH2, RTMP2, RTMP3,
RR4, RR5, v8, RH1, RTMP4, RTMP5)
eor v0.16b, v0.16b, v3.16b
eor v1.16b, v1.16b, v4.16b
eor v2.16b, v2.16b, v5.16b
REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
st1 {v0.16b-v2.16b}, [x1], #(3 * 16)
cbz w4, .Lgcm_dec_hash_len
b .Lgcm_dec_loop_3x
.Lgcm_dec_loop_1x:
cmp w4, #16
blt .Lgcm_dec_tail
sub w4, w4, #16
ld1 {v3.16b}, [x2], #16
/* construct CTRs */
inc32_le128(v0)
rbit v6.16b, v3.16b
eor RHASH.16b, RHASH.16b, v6.16b
SM4_CRYPT_PMUL_128x128_BLK(v0, RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
eor v0.16b, v0.16b, v3.16b
REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
st1 {v0.16b}, [x1], #16
cbz w4, .Lgcm_dec_hash_len
b .Lgcm_dec_loop_1x
.Lgcm_dec_tail:
/* construct CTRs */
inc32_le128(v0)
SM4_CRYPT_BLK(v0)
/* load permute table */
adr_l x0, .Lcts_permute_table
add x0, x0, #32
sub x0, x0, w4, uxtw
ld1 {v3.16b}, [x0]
.Lgcm_dec_tail_loop:
/* do decrypt */
ldrb w0, [x2], #1 /* get 1 byte from input */
umov w6, v0.b[0] /* get top crypted byte */
eor w6, w6, w0 /* w6 = CTR ^ input */
strb w6, [x1], #1 /* store out byte */
/* shift right out one byte */
ext v0.16b, v0.16b, v0.16b, #1
/* the last ciphertext is placed in high bytes */
ins v0.b[15], w0
subs w4, w4, #1
bne .Lgcm_dec_tail_loop
/* padding last block with zeros */
tbl v0.16b, {v0.16b}, v3.16b
/* ghash update */
rbit v0.16b, v0.16b
eor RHASH.16b, RHASH.16b, v0.16b
PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
.Lgcm_dec_hash_len:
cbz x7, .Lgcm_dec_end
GTAG_HASH_LENGTHS(v1, v3)
b .Lgcm_dec_ret
.Lgcm_dec_end:
/* store new CTR */
rev x8, x8
rev x9, x9
stp x8, x9, [x3]
rbit RHASH.16b, RHASH.16b
.Lgcm_dec_ret:
/* store new MAC */
st1 {RHASH.2d}, [x5]
ret
SYM_FUNC_END(sm4_ce_pmull_gcm_dec)
.section ".rodata", "a"
.align 4
.Lcts_permute_table:
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.Lghash_rconst:
.quad 0x87

View File

@ -0,0 +1,286 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
* as specified in rfc8998
* https://datatracker.ietf.org/doc/html/rfc8998
*
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <crypto/b128ops.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#include "sm4-ce.h"
asmlinkage void sm4_ce_pmull_ghash_setup(const u32 *rkey_enc, u8 *ghash_table);
asmlinkage void pmull_ghash_update(const u8 *ghash_table, u8 *ghash,
const u8 *src, unsigned int nblocks);
asmlinkage void sm4_ce_pmull_gcm_enc(const u32 *rkey_enc, u8 *dst,
const u8 *src, u8 *iv,
unsigned int nbytes, u8 *ghash,
const u8 *ghash_table, const u8 *lengths);
asmlinkage void sm4_ce_pmull_gcm_dec(const u32 *rkey_enc, u8 *dst,
const u8 *src, u8 *iv,
unsigned int nbytes, u8 *ghash,
const u8 *ghash_table, const u8 *lengths);
#define GHASH_BLOCK_SIZE 16
#define GCM_IV_SIZE 12
struct sm4_gcm_ctx {
struct sm4_ctx key;
u8 ghash_table[16 * 4];
};
static int gcm_setkey(struct crypto_aead *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(tfm);
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
kernel_neon_end();
return 0;
}
static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
{
switch (authsize) {
case 4:
case 8:
case 12 ... 16:
return 0;
default:
return -EINVAL;
}
}
static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[])
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) buffer[GHASH_BLOCK_SIZE];
u32 assoclen = req->assoclen;
struct scatter_walk walk;
unsigned int buflen = 0;
scatterwalk_start(&walk, req->src);
do {
u32 n = scatterwalk_clamp(&walk, assoclen);
u8 *p, *ptr;
if (!n) {
scatterwalk_start(&walk, sg_next(walk.sg));
n = scatterwalk_clamp(&walk, assoclen);
}
p = ptr = scatterwalk_map(&walk);
assoclen -= n;
scatterwalk_advance(&walk, n);
if (n + buflen < GHASH_BLOCK_SIZE) {
memcpy(&buffer[buflen], ptr, n);
buflen += n;
} else {
unsigned int nblocks;
if (buflen) {
unsigned int l = GHASH_BLOCK_SIZE - buflen;
memcpy(&buffer[buflen], ptr, l);
ptr += l;
n -= l;
pmull_ghash_update(ctx->ghash_table, ghash,
buffer, 1);
}
nblocks = n / GHASH_BLOCK_SIZE;
if (nblocks) {
pmull_ghash_update(ctx->ghash_table, ghash,
ptr, nblocks);
ptr += nblocks * GHASH_BLOCK_SIZE;
}
buflen = n % GHASH_BLOCK_SIZE;
if (buflen)
memcpy(&buffer[0], ptr, buflen);
}
scatterwalk_unmap(p);
scatterwalk_done(&walk, 0, assoclen);
} while (assoclen);
/* padding with '0' */
if (buflen) {
memset(&buffer[buflen], 0, GHASH_BLOCK_SIZE - buflen);
pmull_ghash_update(ctx->ghash_table, ghash, buffer, 1);
}
}
static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
struct sm4_gcm_ctx *ctx, u8 ghash[],
void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc,
u8 *dst, const u8 *src, u8 *iv,
unsigned int nbytes, u8 *ghash,
const u8 *ghash_table, const u8 *lengths))
{
u8 __aligned(8) iv[SM4_BLOCK_SIZE];
be128 __aligned(8) lengths;
int err;
memset(ghash, 0, SM4_BLOCK_SIZE);
lengths.a = cpu_to_be64(req->assoclen * 8);
lengths.b = cpu_to_be64(walk->total * 8);
memcpy(iv, walk->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
kernel_neon_begin();
if (req->assoclen)
gcm_calculate_auth_mac(req, ghash);
do {
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
const u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
if (walk->nbytes == walk->total) {
tail = 0;
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes, ghash,
ctx->ghash_table,
(const u8 *)&lengths);
} else if (walk->nbytes - tail) {
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes - tail, ghash,
ctx->ghash_table, NULL);
}
kernel_neon_end();
err = skcipher_walk_done(walk, tail);
if (err)
return err;
if (walk->nbytes)
kernel_neon_begin();
} while (walk->nbytes > 0);
return 0;
}
static int gcm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = skcipher_walk_aead_encrypt(&walk, req, false);
if (err)
return err;
err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc);
if (err)
return err;
/* copy authtag to end of dst */
scatterwalk_map_and_copy(ghash, req->dst, req->assoclen + req->cryptlen,
crypto_aead_authsize(aead), 1);
return 0;
}
static int gcm_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(aead);
struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
u8 authtag[SM4_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
err = skcipher_walk_aead_decrypt(&walk, req, false);
if (err)
return err;
err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec);
if (err)
return err;
/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(authtag, req->src,
req->assoclen + req->cryptlen - authsize,
authsize, 0);
if (crypto_memneq(authtag, ghash, authsize))
return -EBADMSG;
return 0;
}
static struct aead_alg sm4_gcm_alg = {
.base = {
.cra_name = "gcm(sm4)",
.cra_driver_name = "gcm-sm4-ce",
.cra_priority = 400,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_gcm_ctx),
.cra_module = THIS_MODULE,
},
.ivsize = GCM_IV_SIZE,
.chunksize = SM4_BLOCK_SIZE,
.maxauthsize = SM4_BLOCK_SIZE,
.setkey = gcm_setkey,
.setauthsize = gcm_setauthsize,
.encrypt = gcm_encrypt,
.decrypt = gcm_decrypt,
};
static int __init sm4_ce_gcm_init(void)
{
if (!cpu_have_named_feature(PMULL))
return -ENODEV;
return crypto_register_aead(&sm4_gcm_alg);
}
static void __exit sm4_ce_gcm_exit(void)
{
crypto_unregister_aead(&sm4_gcm_alg);
}
static const struct cpu_feature __maybe_unused sm4_ce_gcm_cpu_feature[] = {
{ cpu_feature(PMULL) },
{}
};
MODULE_DEVICE_TABLE(cpu, sm4_ce_gcm_cpu_feature);
module_cpu_feature_match(SM4, sm4_ce_gcm_init);
module_exit(sm4_ce_gcm_exit);
MODULE_DESCRIPTION("Synchronous SM4 in GCM mode using ARMv8 Crypto Extensions");
MODULE_ALIAS_CRYPTO("gcm(sm4)");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_LICENSE("GPL v2");

View File

@ -14,8 +14,12 @@
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/b128ops.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/internal/hash.h>
#include <crypto/scatterwalk.h>
#include <crypto/xts.h>
#include <crypto/sm4.h>
#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
@ -26,15 +30,48 @@ asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src);
asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src,
unsigned int nblks);
asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
u8 *iv, unsigned int nblocks);
asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
u8 *iv, unsigned int nblocks);
asmlinkage void sm4_ce_cbc_cts_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nbytes);
asmlinkage void sm4_ce_cbc_cts_dec(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nbytes);
asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_xts_enc(const u32 *rkey1, u8 *dst, const u8 *src,
u8 *tweak, unsigned int nbytes,
const u32 *rkey2_enc);
asmlinkage void sm4_ce_xts_dec(const u32 *rkey1, u8 *dst, const u8 *src,
u8 *tweak, unsigned int nbytes,
const u32 *rkey2_enc);
asmlinkage void sm4_ce_mac_update(const u32 *rkey_enc, u8 *digest,
const u8 *src, unsigned int nblocks,
bool enc_before, bool enc_after);
EXPORT_SYMBOL(sm4_ce_expand_key);
EXPORT_SYMBOL(sm4_ce_crypt_block);
EXPORT_SYMBOL(sm4_ce_cbc_enc);
EXPORT_SYMBOL(sm4_ce_cfb_enc);
struct sm4_xts_ctx {
struct sm4_ctx key1;
struct sm4_ctx key2;
};
struct sm4_mac_tfm_ctx {
struct sm4_ctx key;
u8 __aligned(8) consts[];
};
struct sm4_mac_desc_ctx {
unsigned int len;
u8 digest[SM4_BLOCK_SIZE];
};
static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
@ -44,8 +81,33 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
return 0;
}
static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
if (key_len != SM4_KEY_SIZE * 2)
return -EINVAL;
ret = xts_verify_key(tfm, key, key_len);
if (ret)
return ret;
kernel_neon_begin();
sm4_ce_expand_key(key, ctx->key1.rkey_enc,
ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
return 0;
}
@ -94,66 +156,128 @@ static int sm4_ecb_decrypt(struct skcipher_request *req)
return sm4_ecb_do_crypt(req, ctx->rkey_dec);
}
static int sm4_cbc_encrypt(struct skcipher_request *req)
static int sm4_cbc_crypt(struct skcipher_request *req,
struct sm4_ctx *ctx, bool encrypt)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
unsigned int nblocks;
kernel_neon_begin();
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
kernel_neon_begin();
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
if (encrypt)
sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
walk.iv, nblocks);
else
sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
walk.iv, nblocks);
kernel_neon_end();
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
}
return err;
}
static int sm4_cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_cbc_crypt(req, ctx, true);
}
static int sm4_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
return sm4_cbc_crypt(req, ctx, false);
}
static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct scatterlist *src = req->src;
struct scatterlist *dst = req->dst;
struct scatterlist sg_src[2], sg_dst[2];
struct skcipher_request subreq;
struct skcipher_walk walk;
unsigned int nbytes;
int cbc_blocks;
int err;
err = skcipher_walk_virt(&walk, req, false);
if (req->cryptlen < SM4_BLOCK_SIZE)
return -EINVAL;
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
if (req->cryptlen == SM4_BLOCK_SIZE)
return sm4_cbc_crypt(req, ctx, encrypt);
kernel_neon_begin();
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
NULL, NULL);
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
/* handle the CBC cryption part */
cbc_blocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2;
if (cbc_blocks) {
skcipher_request_set_crypt(&subreq, src, dst,
cbc_blocks * SM4_BLOCK_SIZE,
req->iv);
kernel_neon_end();
err = sm4_cbc_crypt(&subreq, ctx, encrypt);
if (err)
return err;
err = skcipher_walk_done(&walk, nbytes);
dst = src = scatterwalk_ffwd(sg_src, src, subreq.cryptlen);
if (req->dst != req->src)
dst = scatterwalk_ffwd(sg_dst, req->dst,
subreq.cryptlen);
}
return err;
/* handle ciphertext stealing */
skcipher_request_set_crypt(&subreq, src, dst,
req->cryptlen - cbc_blocks * SM4_BLOCK_SIZE,
req->iv);
err = skcipher_walk_virt(&walk, &subreq, false);
if (err)
return err;
kernel_neon_begin();
if (encrypt)
sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, walk.nbytes);
else
sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, walk.nbytes);
kernel_neon_end();
return skcipher_walk_done(&walk, 0);
}
static int sm4_cbc_cts_encrypt(struct skcipher_request *req)
{
return sm4_cbc_cts_crypt(req, true);
}
static int sm4_cbc_cts_decrypt(struct skcipher_request *req)
{
return sm4_cbc_cts_crypt(req, false);
}
static int sm4_cfb_encrypt(struct skcipher_request *req)
@ -283,6 +407,111 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
return err;
}
static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int tail = req->cryptlen % SM4_BLOCK_SIZE;
const u32 *rkey2_enc = ctx->key2.rkey_enc;
struct scatterlist sg_src[2], sg_dst[2];
struct skcipher_request subreq;
struct scatterlist *src, *dst;
struct skcipher_walk walk;
unsigned int nbytes;
int err;
if (req->cryptlen < SM4_BLOCK_SIZE)
return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
int nblocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2;
skcipher_walk_abort(&walk);
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq,
skcipher_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(&subreq, req->src, req->dst,
nblocks * SM4_BLOCK_SIZE, req->iv);
err = skcipher_walk_virt(&walk, &subreq, false);
if (err)
return err;
} else {
tail = 0;
}
while ((nbytes = walk.nbytes) >= SM4_BLOCK_SIZE) {
if (nbytes < walk.total)
nbytes &= ~(SM4_BLOCK_SIZE - 1);
kernel_neon_begin();
if (encrypt)
sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, nbytes,
rkey2_enc);
else
sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, nbytes,
rkey2_enc);
kernel_neon_end();
rkey2_enc = NULL;
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
if (err)
return err;
}
if (likely(tail == 0))
return 0;
/* handle ciphertext stealing */
dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
if (req->dst != req->src)
dst = scatterwalk_ffwd(sg_dst, req->dst, subreq.cryptlen);
skcipher_request_set_crypt(&subreq, src, dst, SM4_BLOCK_SIZE + tail,
req->iv);
err = skcipher_walk_virt(&walk, &subreq, false);
if (err)
return err;
kernel_neon_begin();
if (encrypt)
sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, walk.nbytes,
rkey2_enc);
else
sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, walk.nbytes,
rkey2_enc);
kernel_neon_end();
return skcipher_walk_done(&walk, 0);
}
static int sm4_xts_encrypt(struct skcipher_request *req)
{
return sm4_xts_crypt(req, true);
}
static int sm4_xts_decrypt(struct skcipher_request *req)
{
return sm4_xts_crypt(req, false);
}
static struct skcipher_alg sm4_algs[] = {
{
.base = {
@ -345,28 +574,312 @@ static struct skcipher_alg sm4_algs[] = {
.setkey = sm4_setkey,
.encrypt = sm4_ctr_crypt,
.decrypt = sm4_ctr_crypt,
}, {
.base = {
.cra_name = "cts(cbc(sm4))",
.cra_driver_name = "cts-cbc-sm4-ce",
.cra_priority = 400,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE,
.max_keysize = SM4_KEY_SIZE,
.ivsize = SM4_BLOCK_SIZE,
.walksize = SM4_BLOCK_SIZE * 2,
.setkey = sm4_setkey,
.encrypt = sm4_cbc_cts_encrypt,
.decrypt = sm4_cbc_cts_decrypt,
}, {
.base = {
.cra_name = "xts(sm4)",
.cra_driver_name = "xts-sm4-ce",
.cra_priority = 400,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_xts_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = SM4_KEY_SIZE * 2,
.max_keysize = SM4_KEY_SIZE * 2,
.ivsize = SM4_BLOCK_SIZE,
.walksize = SM4_BLOCK_SIZE * 2,
.setkey = sm4_xts_setkey,
.encrypt = sm4_xts_encrypt,
.decrypt = sm4_xts_decrypt,
}
};
static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
return 0;
}
static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
be128 *consts = (be128 *)ctx->consts;
u64 a, b;
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
memset(consts, 0, SM4_BLOCK_SIZE);
kernel_neon_begin();
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
/* encrypt the zero block */
sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
kernel_neon_end();
/* gf(2^128) multiply zero-ciphertext with u and u^2 */
a = be64_to_cpu(consts[0].a);
b = be64_to_cpu(consts[0].b);
consts[0].a = cpu_to_be64((a << 1) | (b >> 63));
consts[0].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
a = be64_to_cpu(consts[0].a);
b = be64_to_cpu(consts[0].b);
consts[1].a = cpu_to_be64((a << 1) | (b >> 63));
consts[1].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
return 0;
}
static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int key_len)
{
struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
u8 __aligned(8) key2[SM4_BLOCK_SIZE];
static u8 const ks[3][SM4_BLOCK_SIZE] = {
{ [0 ... SM4_BLOCK_SIZE - 1] = 0x1},
{ [0 ... SM4_BLOCK_SIZE - 1] = 0x2},
{ [0 ... SM4_BLOCK_SIZE - 1] = 0x3},
};
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
return 0;
}
static int sm4_mac_init(struct shash_desc *desc)
{
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
memset(ctx->digest, 0, SM4_BLOCK_SIZE);
ctx->len = 0;
return 0;
}
static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
unsigned int len)
{
struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int l, nblocks;
if (len == 0)
return 0;
if (ctx->len || ctx->len + len < SM4_BLOCK_SIZE) {
l = min(len, SM4_BLOCK_SIZE - ctx->len);
crypto_xor(ctx->digest + ctx->len, p, l);
ctx->len += l;
len -= l;
p += l;
}
if (len && (ctx->len % SM4_BLOCK_SIZE) == 0) {
kernel_neon_begin();
if (len < SM4_BLOCK_SIZE && ctx->len == SM4_BLOCK_SIZE) {
sm4_ce_crypt_block(tctx->key.rkey_enc,
ctx->digest, ctx->digest);
ctx->len = 0;
} else {
nblocks = len / SM4_BLOCK_SIZE;
len %= SM4_BLOCK_SIZE;
sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
nblocks, (ctx->len == SM4_BLOCK_SIZE),
(len != 0));
p += nblocks * SM4_BLOCK_SIZE;
if (len == 0)
ctx->len = SM4_BLOCK_SIZE;
}
kernel_neon_end();
if (len) {
crypto_xor(ctx->digest, p, len);
ctx->len = len;
}
}
return 0;
}
static int sm4_cmac_final(struct shash_desc *desc, u8 *out)
{
struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
const u8 *consts = tctx->consts;
if (ctx->len != SM4_BLOCK_SIZE) {
ctx->digest[ctx->len] ^= 0x80;
consts += SM4_BLOCK_SIZE;
}
kernel_neon_begin();
sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
false, true);
kernel_neon_end();
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;
}
static int sm4_cbcmac_final(struct shash_desc *desc, u8 *out)
{
struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
if (ctx->len) {
kernel_neon_begin();
sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
ctx->digest);
kernel_neon_end();
}
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;
}
static struct shash_alg sm4_mac_algs[] = {
{
.base = {
.cra_name = "cmac(sm4)",
.cra_driver_name = "cmac-sm4-ce",
.cra_priority = 400,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx)
+ SM4_BLOCK_SIZE * 2,
.cra_module = THIS_MODULE,
},
.digestsize = SM4_BLOCK_SIZE,
.init = sm4_mac_init,
.update = sm4_mac_update,
.final = sm4_cmac_final,
.setkey = sm4_cmac_setkey,
.descsize = sizeof(struct sm4_mac_desc_ctx),
}, {
.base = {
.cra_name = "xcbc(sm4)",
.cra_driver_name = "xcbc-sm4-ce",
.cra_priority = 400,
.cra_blocksize = SM4_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx)
+ SM4_BLOCK_SIZE * 2,
.cra_module = THIS_MODULE,
},
.digestsize = SM4_BLOCK_SIZE,
.init = sm4_mac_init,
.update = sm4_mac_update,
.final = sm4_cmac_final,
.setkey = sm4_xcbc_setkey,
.descsize = sizeof(struct sm4_mac_desc_ctx),
}, {
.base = {
.cra_name = "cbcmac(sm4)",
.cra_driver_name = "cbcmac-sm4-ce",
.cra_priority = 400,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx),
.cra_module = THIS_MODULE,
},
.digestsize = SM4_BLOCK_SIZE,
.init = sm4_mac_init,
.update = sm4_mac_update,
.final = sm4_cbcmac_final,
.setkey = sm4_cbcmac_setkey,
.descsize = sizeof(struct sm4_mac_desc_ctx),
}
};
static int __init sm4_init(void)
{
return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
int err;
err = crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
if (err)
return err;
err = crypto_register_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
if (err)
goto out_err;
return 0;
out_err:
crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
return err;
}
static void __exit sm4_exit(void)
{
crypto_unregister_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
module_cpu_feature_match(SM4, sm4_init);
module_exit(sm4_exit);
MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions");
MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR/XTS using ARMv8 Crypto Extensions");
MODULE_ALIAS_CRYPTO("sm4-ce");
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("ecb(sm4)");
MODULE_ALIAS_CRYPTO("cbc(sm4)");
MODULE_ALIAS_CRYPTO("cfb(sm4)");
MODULE_ALIAS_CRYPTO("ctr(sm4)");
MODULE_ALIAS_CRYPTO("cts(cbc(sm4))");
MODULE_ALIAS_CRYPTO("xts(sm4)");
MODULE_ALIAS_CRYPTO("cmac(sm4)");
MODULE_ALIAS_CRYPTO("xcbc(sm4)");
MODULE_ALIAS_CRYPTO("cbcmac(sm4)");
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
MODULE_LICENSE("GPL v2");

View File

@ -0,0 +1,16 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SM4 common functions for Crypto Extensions
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec,
const u32 *fk, const u32 *ck);
void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src);
void sm4_ce_cbc_enc(const u32 *rkey_enc, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblocks);
void sm4_ce_cfb_enc(const u32 *rkey_enc, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblocks);

View File

@ -18,6 +18,11 @@
#define RTMP2 v10
#define RTMP3 v11
#define RTMP4 v12
#define RTMP5 v13
#define RTMP6 v14
#define RTMP7 v15
#define RX0 v12
#define RX1 v13
#define RKEY v14
@ -25,7 +30,7 @@
/* Helper macros. */
#define PREPARE \
#define SM4_PREPARE() \
adr_l x5, crypto_sm4_sbox; \
ld1 {v16.16b-v19.16b}, [x5], #64; \
ld1 {v20.16b-v23.16b}, [x5], #64; \
@ -42,7 +47,25 @@
zip1 s2.2d, RTMP2.2d, RTMP3.2d; \
zip2 s3.2d, RTMP2.2d, RTMP3.2d;
#define rotate_clockwise_90(s0, s1, s2, s3) \
#define transpose_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7) \
zip1 RTMP0.4s, s0.4s, s1.4s; \
zip1 RTMP1.4s, s2.4s, s3.4s; \
zip2 RTMP2.4s, s0.4s, s1.4s; \
zip2 RTMP3.4s, s2.4s, s3.4s; \
zip1 RTMP4.4s, s4.4s, s5.4s; \
zip1 RTMP5.4s, s6.4s, s7.4s; \
zip2 RTMP6.4s, s4.4s, s5.4s; \
zip2 RTMP7.4s, s6.4s, s7.4s; \
zip1 s0.2d, RTMP0.2d, RTMP1.2d; \
zip2 s1.2d, RTMP0.2d, RTMP1.2d; \
zip1 s2.2d, RTMP2.2d, RTMP3.2d; \
zip2 s3.2d, RTMP2.2d, RTMP3.2d; \
zip1 s4.2d, RTMP4.2d, RTMP5.2d; \
zip2 s5.2d, RTMP4.2d, RTMP5.2d; \
zip1 s6.2d, RTMP6.2d, RTMP7.2d; \
zip2 s7.2d, RTMP6.2d, RTMP7.2d;
#define rotate_clockwise_4x4(s0, s1, s2, s3) \
zip1 RTMP0.4s, s1.4s, s0.4s; \
zip2 RTMP1.4s, s1.4s, s0.4s; \
zip1 RTMP2.4s, s3.4s, s2.4s; \
@ -52,6 +75,24 @@
zip1 s2.2d, RTMP3.2d, RTMP1.2d; \
zip2 s3.2d, RTMP3.2d, RTMP1.2d;
#define rotate_clockwise_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7) \
zip1 RTMP0.4s, s1.4s, s0.4s; \
zip1 RTMP2.4s, s3.4s, s2.4s; \
zip2 RTMP1.4s, s1.4s, s0.4s; \
zip2 RTMP3.4s, s3.4s, s2.4s; \
zip1 RTMP4.4s, s5.4s, s4.4s; \
zip1 RTMP6.4s, s7.4s, s6.4s; \
zip2 RTMP5.4s, s5.4s, s4.4s; \
zip2 RTMP7.4s, s7.4s, s6.4s; \
zip1 s0.2d, RTMP2.2d, RTMP0.2d; \
zip2 s1.2d, RTMP2.2d, RTMP0.2d; \
zip1 s2.2d, RTMP3.2d, RTMP1.2d; \
zip2 s3.2d, RTMP3.2d, RTMP1.2d; \
zip1 s4.2d, RTMP6.2d, RTMP4.2d; \
zip2 s5.2d, RTMP6.2d, RTMP4.2d; \
zip1 s6.2d, RTMP7.2d, RTMP5.2d; \
zip2 s7.2d, RTMP7.2d, RTMP5.2d;
#define ROUND4(round, s0, s1, s2, s3) \
dup RX0.4s, RKEY.s[round]; \
/* rk ^ s1 ^ s2 ^ s3 */ \
@ -87,14 +128,7 @@
/* s0 ^= RTMP3 */ \
eor s0.16b, s0.16b, RTMP3.16b;
#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
\
transpose_4x4(b0, b1, b2, b3); \
\
#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3) \
mov x6, 8; \
4: \
ld1 {RKEY.4s}, [x0], #16; \
@ -107,15 +141,23 @@
\
bne 4b; \
\
rotate_clockwise_90(b0, b1, b2, b3); \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
\
rotate_clockwise_4x4(b0, b1, b2, b3); \
\
/* repoint to rkey */ \
sub x0, x0, #128;
#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
SM4_CRYPT_BLK4_BE(b0, b1, b2, b3);
#define ROUND8(round, s0, s1, s2, s3, t0, t1, t2, t3) \
/* rk ^ s1 ^ s2 ^ s3 */ \
dup RX0.4s, RKEY.s[round]; \
@ -175,7 +217,7 @@
eor s0.16b, s0.16b, RTMP0.16b; \
eor t0.16b, t0.16b, RTMP1.16b;
#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
#define SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
@ -185,9 +227,6 @@
rev32 b6.16b, b6.16b; \
rev32 b7.16b, b7.16b; \
\
transpose_4x4(b0, b1, b2, b3); \
transpose_4x4(b4, b5, b6, b7); \
\
mov x6, 8; \
8: \
ld1 {RKEY.4s}, [x0], #16; \
@ -200,8 +239,6 @@
\
bne 8b; \
\
rotate_clockwise_90(b0, b1, b2, b3); \
rotate_clockwise_90(b4, b5, b6, b7); \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
@ -214,274 +251,429 @@
/* repoint to rkey */ \
sub x0, x0, #128;
#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7); \
rotate_clockwise_4x4_2x(b0, b1, b2, b3, b4, b5, b6, b7); \
.align 3
SYM_FUNC_START_LOCAL(__sm4_neon_crypt_blk1_4)
SYM_FUNC_START(sm4_neon_crypt)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* w3: num blocks (1..4)
* w3: nblocks
*/
PREPARE;
SM4_PREPARE()
ld1 {v0.16b}, [x2], #16;
mov v1.16b, v0.16b;
mov v2.16b, v0.16b;
mov v3.16b, v0.16b;
cmp w3, #2;
blt .Lblk4_load_input_done;
ld1 {v1.16b}, [x2], #16;
beq .Lblk4_load_input_done;
ld1 {v2.16b}, [x2], #16;
cmp w3, #3;
beq .Lblk4_load_input_done;
ld1 {v3.16b}, [x2];
.Lcrypt_loop_8x:
sub w3, w3, #8
tbnz w3, #31, .Lcrypt_4x
.Lblk4_load_input_done:
SM4_CRYPT_BLK4(v0, v1, v2, v3);
ld4 {v0.4s-v3.4s}, [x2], #64
ld4 {v4.4s-v7.4s}, [x2], #64
st1 {v0.16b}, [x1], #16;
cmp w3, #2;
blt .Lblk4_store_output_done;
st1 {v1.16b}, [x1], #16;
beq .Lblk4_store_output_done;
st1 {v2.16b}, [x1], #16;
cmp w3, #3;
beq .Lblk4_store_output_done;
st1 {v3.16b}, [x1];
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
.Lblk4_store_output_done:
ret;
SYM_FUNC_END(__sm4_neon_crypt_blk1_4)
st1 {v0.16b-v3.16b}, [x1], #64
st1 {v4.16b-v7.16b}, [x1], #64
.align 3
SYM_FUNC_START(sm4_neon_crypt_blk1_8)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* w3: num blocks (1..8)
*/
cmp w3, #5;
blt __sm4_neon_crypt_blk1_4;
cbz w3, .Lcrypt_end
b .Lcrypt_loop_8x
PREPARE;
.Lcrypt_4x:
add w3, w3, #8
cmp w3, #4
blt .Lcrypt_tail
ld1 {v0.16b-v3.16b}, [x2], #64;
ld1 {v4.16b}, [x2], #16;
mov v5.16b, v4.16b;
mov v6.16b, v4.16b;
mov v7.16b, v4.16b;
beq .Lblk8_load_input_done;
ld1 {v5.16b}, [x2], #16;
cmp w3, #7;
blt .Lblk8_load_input_done;
ld1 {v6.16b}, [x2], #16;
beq .Lblk8_load_input_done;
ld1 {v7.16b}, [x2];
sub w3, w3, #4
.Lblk8_load_input_done:
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
ld4 {v0.4s-v3.4s}, [x2], #64
cmp w3, #6;
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b}, [x1], #16;
blt .Lblk8_store_output_done;
st1 {v5.16b}, [x1], #16;
beq .Lblk8_store_output_done;
st1 {v6.16b}, [x1], #16;
cmp w3, #7;
beq .Lblk8_store_output_done;
st1 {v7.16b}, [x1];
SM4_CRYPT_BLK4(v0, v1, v2, v3)
.Lblk8_store_output_done:
ret;
SYM_FUNC_END(sm4_neon_crypt_blk1_8)
st1 {v0.16b-v3.16b}, [x1], #64
.align 3
SYM_FUNC_START(sm4_neon_crypt_blk8)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* w3: nblocks (multiples of 8)
*/
PREPARE;
cbz w3, .Lcrypt_end
.Lcrypt_loop_blk:
subs w3, w3, #8;
bmi .Lcrypt_end;
.Lcrypt_tail:
cmp w3, #2
ld1 {v0.16b}, [x2], #16
blt .Lcrypt_tail_load_done
ld1 {v1.16b}, [x2], #16
beq .Lcrypt_tail_load_done
ld1 {v2.16b}, [x2], #16
ld1 {v0.16b-v3.16b}, [x2], #64;
ld1 {v4.16b-v7.16b}, [x2], #64;
.Lcrypt_tail_load_done:
transpose_4x4(v0, v1, v2, v3)
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
SM4_CRYPT_BLK4(v0, v1, v2, v3)
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b-v7.16b}, [x1], #64;
b .Lcrypt_loop_blk;
cmp w3, #2
st1 {v0.16b}, [x1], #16
blt .Lcrypt_end
st1 {v1.16b}, [x1], #16
beq .Lcrypt_end
st1 {v2.16b}, [x1], #16
.Lcrypt_end:
ret;
SYM_FUNC_END(sm4_neon_crypt_blk8)
ret
SYM_FUNC_END(sm4_neon_crypt)
.align 3
SYM_FUNC_START(sm4_neon_cbc_dec_blk8)
SYM_FUNC_START(sm4_neon_cbc_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks (multiples of 8)
* w4: nblocks
*/
PREPARE;
SM4_PREPARE()
ld1 {RIV.16b}, [x3];
ld1 {RIV.16b}, [x3]
.Lcbc_loop_blk:
subs w4, w4, #8;
bmi .Lcbc_end;
.Lcbc_dec_loop_8x:
sub w4, w4, #8
tbnz w4, #31, .Lcbc_dec_4x
ld1 {v0.16b-v3.16b}, [x2], #64;
ld1 {v4.16b-v7.16b}, [x2];
ld4 {v0.4s-v3.4s}, [x2], #64
ld4 {v4.4s-v7.4s}, [x2]
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
SM4_CRYPT_BLK8_norotate(v0, v1, v2, v3, v4, v5, v6, v7)
sub x2, x2, #64;
eor v0.16b, v0.16b, RIV.16b;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v1.16b, v1.16b, RTMP0.16b;
eor v2.16b, v2.16b, RTMP1.16b;
eor v3.16b, v3.16b, RTMP2.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
/* Avoid overwriting the RIV register */
rotate_clockwise_4x4(v0, v1, v2, v3)
rotate_clockwise_4x4(v4, v5, v6, v7)
eor v4.16b, v4.16b, RTMP3.16b;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v5.16b, v5.16b, RTMP0.16b;
eor v6.16b, v6.16b, RTMP1.16b;
eor v7.16b, v7.16b, RTMP2.16b;
sub x2, x2, #64
mov RIV.16b, RTMP3.16b;
st1 {v4.16b-v7.16b}, [x1], #64;
eor v0.16b, v0.16b, RIV.16b
b .Lcbc_loop_blk;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
.Lcbc_end:
eor v1.16b, v1.16b, RTMP0.16b
eor v2.16b, v2.16b, RTMP1.16b
eor v3.16b, v3.16b, RTMP2.16b
eor v4.16b, v4.16b, RTMP3.16b
eor v5.16b, v5.16b, RTMP4.16b
eor v6.16b, v6.16b, RTMP5.16b
eor v7.16b, v7.16b, RTMP6.16b
mov RIV.16b, RTMP7.16b
st1 {v0.16b-v3.16b}, [x1], #64
st1 {v4.16b-v7.16b}, [x1], #64
cbz w4, .Lcbc_dec_end
b .Lcbc_dec_loop_8x
.Lcbc_dec_4x:
add w4, w4, #8
cmp w4, #4
blt .Lcbc_dec_tail
sub w4, w4, #4
ld1 {v0.16b-v3.16b}, [x2], #64
rev32 v4.16b, v0.16b
rev32 v5.16b, v1.16b
rev32 v6.16b, v2.16b
rev32 v7.16b, v3.16b
transpose_4x4(v4, v5, v6, v7)
SM4_CRYPT_BLK4_BE(v4, v5, v6, v7)
eor v4.16b, v4.16b, RIV.16b
eor v5.16b, v5.16b, v0.16b
eor v6.16b, v6.16b, v1.16b
eor v7.16b, v7.16b, v2.16b
mov RIV.16b, v3.16b
st1 {v4.16b-v7.16b}, [x1], #64
cbz w4, .Lcbc_dec_end
.Lcbc_dec_tail:
cmp w4, #2
ld1 {v0.16b}, [x2], #16
blt .Lcbc_dec_tail_load_done
ld1 {v1.16b}, [x2], #16
beq .Lcbc_dec_tail_load_done
ld1 {v2.16b}, [x2], #16
.Lcbc_dec_tail_load_done:
rev32 v4.16b, v0.16b
rev32 v5.16b, v1.16b
rev32 v6.16b, v2.16b
transpose_4x4(v4, v5, v6, v7)
SM4_CRYPT_BLK4_BE(v4, v5, v6, v7)
cmp w4, #2
eor v4.16b, v4.16b, RIV.16b
mov RIV.16b, v0.16b
st1 {v4.16b}, [x1], #16
blt .Lcbc_dec_end
eor v5.16b, v5.16b, v0.16b
mov RIV.16b, v1.16b
st1 {v5.16b}, [x1], #16
beq .Lcbc_dec_end
eor v6.16b, v6.16b, v1.16b
mov RIV.16b, v2.16b
st1 {v6.16b}, [x1], #16
.Lcbc_dec_end:
/* store new IV */
st1 {RIV.16b}, [x3];
st1 {RIV.16b}, [x3]
ret;
SYM_FUNC_END(sm4_neon_cbc_dec_blk8)
ret
SYM_FUNC_END(sm4_neon_cbc_dec)
.align 3
SYM_FUNC_START(sm4_neon_cfb_dec_blk8)
SYM_FUNC_START(sm4_neon_cfb_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
* w4: nblocks (multiples of 8)
* w4: nblocks
*/
PREPARE;
SM4_PREPARE()
ld1 {v0.16b}, [x3];
ld1 {v0.16b}, [x3]
.Lcfb_loop_blk:
subs w4, w4, #8;
bmi .Lcfb_end;
.Lcfb_dec_loop_8x:
sub w4, w4, #8
tbnz w4, #31, .Lcfb_dec_4x
ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48;
ld1 {v4.16b-v7.16b}, [x2];
ld1 {v1.16b-v3.16b}, [x2], #48
ld4 {v4.4s-v7.4s}, [x2]
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
transpose_4x4(v0, v1, v2, v3)
sub x2, x2, #48;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v0.16b, v0.16b, RTMP0.16b;
eor v1.16b, v1.16b, RTMP1.16b;
eor v2.16b, v2.16b, RTMP2.16b;
eor v3.16b, v3.16b, RTMP3.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v4.16b, v4.16b, RTMP0.16b;
eor v5.16b, v5.16b, RTMP1.16b;
eor v6.16b, v6.16b, RTMP2.16b;
eor v7.16b, v7.16b, RTMP3.16b;
st1 {v4.16b-v7.16b}, [x1], #64;
sub x2, x2, #48
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
mov v0.16b, RTMP3.16b;
eor v0.16b, v0.16b, RTMP0.16b
eor v1.16b, v1.16b, RTMP1.16b
eor v2.16b, v2.16b, RTMP2.16b
eor v3.16b, v3.16b, RTMP3.16b
eor v4.16b, v4.16b, RTMP4.16b
eor v5.16b, v5.16b, RTMP5.16b
eor v6.16b, v6.16b, RTMP6.16b
eor v7.16b, v7.16b, RTMP7.16b
b .Lcfb_loop_blk;
st1 {v0.16b-v3.16b}, [x1], #64
st1 {v4.16b-v7.16b}, [x1], #64
.Lcfb_end:
mov v0.16b, RTMP7.16b
cbz w4, .Lcfb_dec_end
b .Lcfb_dec_loop_8x
.Lcfb_dec_4x:
add w4, w4, #8
cmp w4, #4
blt .Lcfb_dec_tail
sub w4, w4, #4
ld1 {v4.16b-v7.16b}, [x2], #64
rev32 v0.16b, v0.16b /* v0 is IV register */
rev32 v1.16b, v4.16b
rev32 v2.16b, v5.16b
rev32 v3.16b, v6.16b
transpose_4x4(v0, v1, v2, v3)
SM4_CRYPT_BLK4_BE(v0, v1, v2, v3)
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
eor v3.16b, v3.16b, v7.16b
st1 {v0.16b-v3.16b}, [x1], #64
mov v0.16b, v7.16b
cbz w4, .Lcfb_dec_end
.Lcfb_dec_tail:
cmp w4, #2
ld1 {v4.16b}, [x2], #16
blt .Lcfb_dec_tail_load_done
ld1 {v5.16b}, [x2], #16
beq .Lcfb_dec_tail_load_done
ld1 {v6.16b}, [x2], #16
.Lcfb_dec_tail_load_done:
rev32 v0.16b, v0.16b /* v0 is IV register */
rev32 v1.16b, v4.16b
rev32 v2.16b, v5.16b
transpose_4x4(v0, v1, v2, v3)
SM4_CRYPT_BLK4_BE(v0, v1, v2, v3)
cmp w4, #2
eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x1], #16
mov v0.16b, v4.16b
blt .Lcfb_dec_end
eor v1.16b, v1.16b, v5.16b
st1 {v1.16b}, [x1], #16
mov v0.16b, v5.16b
beq .Lcfb_dec_end
eor v2.16b, v2.16b, v6.16b
st1 {v2.16b}, [x1], #16
mov v0.16b, v6.16b
.Lcfb_dec_end:
/* store new IV */
st1 {v0.16b}, [x3];
st1 {v0.16b}, [x3]
ret;
SYM_FUNC_END(sm4_neon_cfb_dec_blk8)
ret
SYM_FUNC_END(sm4_neon_cfb_dec)
.align 3
SYM_FUNC_START(sm4_neon_ctr_enc_blk8)
SYM_FUNC_START(sm4_neon_ctr_crypt)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: ctr (big endian, 128 bit)
* w4: nblocks (multiples of 8)
* w4: nblocks
*/
PREPARE;
SM4_PREPARE()
ldp x7, x8, [x3];
rev x7, x7;
rev x8, x8;
ldp x7, x8, [x3]
rev x7, x7
rev x8, x8
.Lctr_loop_blk:
subs w4, w4, #8;
bmi .Lctr_end;
.Lctr_crypt_loop_8x:
sub w4, w4, #8
tbnz w4, #31, .Lctr_crypt_4x
#define inc_le128(vctr) \
mov vctr.d[1], x8; \
mov vctr.d[0], x7; \
adds x8, x8, #1; \
adc x7, x7, xzr; \
rev64 vctr.16b, vctr.16b;
#define inc_le128(vctr) \
mov vctr.d[1], x8; \
mov vctr.d[0], x7; \
adds x8, x8, #1; \
rev64 vctr.16b, vctr.16b; \
adc x7, x7, xzr;
/* construct CTRs */
inc_le128(v0); /* +0 */
inc_le128(v1); /* +1 */
inc_le128(v2); /* +2 */
inc_le128(v3); /* +3 */
inc_le128(v4); /* +4 */
inc_le128(v5); /* +5 */
inc_le128(v6); /* +6 */
inc_le128(v7); /* +7 */
inc_le128(v0) /* +0 */
inc_le128(v1) /* +1 */
inc_le128(v2) /* +2 */
inc_le128(v3) /* +3 */
inc_le128(v4) /* +4 */
inc_le128(v5) /* +5 */
inc_le128(v6) /* +6 */
inc_le128(v7) /* +7 */
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
transpose_4x4_2x(v0, v1, v2, v3, v4, v5, v6, v7)
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v0.16b, v0.16b, RTMP0.16b;
eor v1.16b, v1.16b, RTMP1.16b;
eor v2.16b, v2.16b, RTMP2.16b;
eor v3.16b, v3.16b, RTMP3.16b;
st1 {v0.16b-v3.16b}, [x1], #64;
SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
eor v4.16b, v4.16b, RTMP0.16b;
eor v5.16b, v5.16b, RTMP1.16b;
eor v6.16b, v6.16b, RTMP2.16b;
eor v7.16b, v7.16b, RTMP3.16b;
st1 {v4.16b-v7.16b}, [x1], #64;
ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
b .Lctr_loop_blk;
eor v0.16b, v0.16b, RTMP0.16b
eor v1.16b, v1.16b, RTMP1.16b
eor v2.16b, v2.16b, RTMP2.16b
eor v3.16b, v3.16b, RTMP3.16b
eor v4.16b, v4.16b, RTMP4.16b
eor v5.16b, v5.16b, RTMP5.16b
eor v6.16b, v6.16b, RTMP6.16b
eor v7.16b, v7.16b, RTMP7.16b
.Lctr_end:
st1 {v0.16b-v3.16b}, [x1], #64
st1 {v4.16b-v7.16b}, [x1], #64
cbz w4, .Lctr_crypt_end
b .Lctr_crypt_loop_8x
.Lctr_crypt_4x:
add w4, w4, #8
cmp w4, #4
blt .Lctr_crypt_tail
sub w4, w4, #4
/* construct CTRs */
inc_le128(v0) /* +0 */
inc_le128(v1) /* +1 */
inc_le128(v2) /* +2 */
inc_le128(v3) /* +3 */
ld1 {v4.16b-v7.16b}, [x2], #64
transpose_4x4(v0, v1, v2, v3)
SM4_CRYPT_BLK4(v0, v1, v2, v3)
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
eor v3.16b, v3.16b, v7.16b
st1 {v0.16b-v3.16b}, [x1], #64
cbz w4, .Lctr_crypt_end
.Lctr_crypt_tail:
/* inc_le128 will change the sign bit */
ld1 {v4.16b}, [x2], #16
inc_le128(v0)
cmp w4, #2
blt .Lctr_crypt_tail_load_done
ld1 {v5.16b}, [x2], #16
inc_le128(v1)
cmp w4, #2
beq .Lctr_crypt_tail_load_done
ld1 {v6.16b}, [x2], #16
inc_le128(v2)
.Lctr_crypt_tail_load_done:
transpose_4x4(v0, v1, v2, v3)
SM4_CRYPT_BLK4(v0, v1, v2, v3)
cmp w4, #2
eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x1], #16
blt .Lctr_crypt_end
eor v1.16b, v1.16b, v5.16b
st1 {v1.16b}, [x1], #16
beq .Lctr_crypt_end
eor v2.16b, v2.16b, v6.16b
st1 {v2.16b}, [x1], #16
.Lctr_crypt_end:
/* store new CTR */
rev x7, x7;
rev x8, x8;
stp x7, x8, [x3];
rev x7, x7
rev x8, x8
stp x7, x8, [x3]
ret;
SYM_FUNC_END(sm4_neon_ctr_enc_blk8)
ret
SYM_FUNC_END(sm4_neon_ctr_crypt)

View File

@ -18,19 +18,14 @@
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
#define BYTES2BLK8(nbytes) (((nbytes) >> 4) & ~(8 - 1))
asmlinkage void sm4_neon_crypt_blk1_8(const u32 *rkey, u8 *dst, const u8 *src,
unsigned int nblks);
asmlinkage void sm4_neon_crypt_blk8(const u32 *rkey, u8 *dst, const u8 *src,
unsigned int nblks);
asmlinkage void sm4_neon_cbc_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_neon_cfb_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_neon_ctr_enc_blk8(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_neon_crypt(const u32 *rkey, u8 *dst, const u8 *src,
unsigned int nblocks);
asmlinkage void sm4_neon_cbc_dec(const u32 *rkey_dec, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblocks);
asmlinkage void sm4_neon_cfb_dec(const u32 *rkey_enc, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblocks);
asmlinkage void sm4_neon_ctr_crypt(const u32 *rkey_enc, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblocks);
static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
@ -51,27 +46,18 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
unsigned int nblocks;
kernel_neon_begin();
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
kernel_neon_begin();
nblks = BYTES2BLK8(nbytes);
if (nblks) {
sm4_neon_crypt_blk8(rkey, dst, src, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
sm4_neon_crypt(rkey, dst, src, nblocks);
kernel_neon_end();
}
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_neon_crypt_blk1_8(rkey, dst, src, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
}
return err;
@ -138,48 +124,19 @@ static int sm4_cbc_decrypt(struct skcipher_request *req)
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
unsigned int nblocks;
kernel_neon_begin();
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
kernel_neon_begin();
nblks = BYTES2BLK8(nbytes);
if (nblks) {
sm4_neon_cbc_dec_blk8(ctx->rkey_dec, dst, src,
walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
walk.iv, nblocks);
kernel_neon_end();
}
nblks = BYTES2BLKS(nbytes);
if (nblks) {
u8 keystream[SM4_BLOCK_SIZE * 8];
u8 iv[SM4_BLOCK_SIZE];
int i;
sm4_neon_crypt_blk1_8(ctx->rkey_dec, keystream,
src, nblks);
src += ((int)nblks - 2) * SM4_BLOCK_SIZE;
dst += (nblks - 1) * SM4_BLOCK_SIZE;
memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
for (i = nblks - 1; i > 0; i--) {
crypto_xor_cpy(dst, src,
&keystream[i * SM4_BLOCK_SIZE],
SM4_BLOCK_SIZE);
src -= SM4_BLOCK_SIZE;
dst -= SM4_BLOCK_SIZE;
}
crypto_xor_cpy(dst, walk.iv,
keystream, SM4_BLOCK_SIZE);
memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
}
return err;
@ -238,42 +195,22 @@ static int sm4_cfb_decrypt(struct skcipher_request *req)
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
unsigned int nblocks;
kernel_neon_begin();
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
kernel_neon_begin();
nblks = BYTES2BLK8(nbytes);
if (nblks) {
sm4_neon_cfb_dec_blk8(ctx->rkey_enc, dst, src,
walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
sm4_neon_cfb_dec(ctx->rkey_enc, dst, src,
walk.iv, nblocks);
kernel_neon_end();
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
nblks = BYTES2BLKS(nbytes);
if (nblks) {
u8 keystream[SM4_BLOCK_SIZE * 8];
memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
if (nblks > 1)
memcpy(&keystream[SM4_BLOCK_SIZE], src,
(nblks - 1) * SM4_BLOCK_SIZE);
memcpy(walk.iv, src + (nblks - 1) * SM4_BLOCK_SIZE,
SM4_BLOCK_SIZE);
sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
keystream, nblks);
crypto_xor_cpy(dst, src, keystream,
nblks * SM4_BLOCK_SIZE);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];
@ -302,41 +239,22 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
unsigned int nblocks;
kernel_neon_begin();
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
kernel_neon_begin();
nblks = BYTES2BLK8(nbytes);
if (nblks) {
sm4_neon_ctr_enc_blk8(ctx->rkey_enc, dst, src,
walk.iv, nblks);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
walk.iv, nblocks);
kernel_neon_end();
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;
}
nblks = BYTES2BLKS(nbytes);
if (nblks) {
u8 keystream[SM4_BLOCK_SIZE * 8];
int i;
for (i = 0; i < nblks; i++) {
memcpy(&keystream[i * SM4_BLOCK_SIZE],
walk.iv, SM4_BLOCK_SIZE);
crypto_inc(walk.iv, SM4_BLOCK_SIZE);
}
sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
keystream, nblks);
crypto_xor_cpy(dst, src, keystream,
nblks * SM4_BLOCK_SIZE);
dst += nblks * SM4_BLOCK_SIZE;
src += nblks * SM4_BLOCK_SIZE;
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
u8 keystream[SM4_BLOCK_SIZE];

View File

@ -82,7 +82,6 @@ static int __init rng_init (void)
sigio_broken(random_fd);
hwrng.name = RNG_MODULE_NAME;
hwrng.read = rng_dev_read;
hwrng.quality = 1024;
err = hwrng_register(&hwrng);
if (err) {

View File

@ -107,3 +107,6 @@ quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $< > $@
$(obj)/%.S: $(src)/%.pl FORCE
$(call if_changed,perlasm)
# Disable GCOV in odd or sensitive code
GCOV_PROFILE_curve25519-x86_64.o := n

View File

@ -7,6 +7,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/frame.h>
#define STATE0 %xmm0
@ -402,7 +403,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_ad)
* void crypto_aegis128_aesni_enc(void *state, unsigned int length,
* const void *src, void *dst);
*/
SYM_FUNC_START(crypto_aegis128_aesni_enc)
SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
FRAME_BEGIN
cmp $0x10, LEN
@ -499,7 +500,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc)
* void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
FRAME_BEGIN
/* load the state: */
@ -556,7 +557,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
* void crypto_aegis128_aesni_dec(void *state, unsigned int length,
* const void *src, void *dst);
*/
SYM_FUNC_START(crypto_aegis128_aesni_dec)
SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
FRAME_BEGIN
cmp $0x10, LEN
@ -653,7 +654,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_dec)
* void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
FRAME_BEGIN
/* load the state: */

View File

@ -7,6 +7,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/frame.h>
/* struct aria_ctx: */
@ -913,7 +914,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way)
RET;
SYM_FUNC_END(__aria_aesni_avx_crypt_16way)
SYM_FUNC_START(aria_aesni_avx_encrypt_16way)
SYM_TYPED_FUNC_START(aria_aesni_avx_encrypt_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@ -938,7 +939,7 @@ SYM_FUNC_START(aria_aesni_avx_encrypt_16way)
RET;
SYM_FUNC_END(aria_aesni_avx_encrypt_16way)
SYM_FUNC_START(aria_aesni_avx_decrypt_16way)
SYM_TYPED_FUNC_START(aria_aesni_avx_decrypt_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@ -1039,7 +1040,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_ctr_gen_keystream_16way)
RET;
SYM_FUNC_END(__aria_aesni_avx_ctr_gen_keystream_16way)
SYM_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
/* input:
* %rdi: ctx
* %rsi: dst
@ -1208,7 +1209,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
RET;
SYM_FUNC_END(__aria_aesni_avx_gfni_crypt_16way)
SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way)
SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_encrypt_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@ -1233,7 +1234,7 @@ SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way)
RET;
SYM_FUNC_END(aria_aesni_avx_gfni_encrypt_16way)
SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way)
SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_decrypt_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@ -1258,7 +1259,7 @@ SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way)
RET;
SYM_FUNC_END(aria_aesni_avx_gfni_decrypt_16way)
SYM_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
/* input:
* %rdi: ctx
* %rsi: dst

View File

@ -8,6 +8,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#define PASS0_SUMS %ymm0
#define PASS1_SUMS %ymm1
@ -65,11 +66,11 @@
/*
* void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
* u8 hash[NH_HASH_BYTES])
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
SYM_FUNC_START(nh_avx2)
SYM_TYPED_FUNC_START(nh_avx2)
vmovdqu 0x00(KEY), K0
vmovdqu 0x10(KEY), K1

View File

@ -8,6 +8,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#define PASS0_SUMS %xmm0
#define PASS1_SUMS %xmm1
@ -67,11 +68,11 @@
/*
* void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
* u8 hash[NH_HASH_BYTES])
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
SYM_FUNC_START(nh_sse2)
SYM_TYPED_FUNC_START(nh_sse2)
movdqu 0x00(KEY), K0
movdqu 0x10(KEY), K1

View File

@ -14,14 +14,7 @@
#include <asm/simd.h>
asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
u8 hash[NH_HASH_BYTES]);
/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES])
{
nh_avx2(key, message, message_len, (u8 *)hash);
}
__le64 hash[NH_NUM_PASSES]);
static int nhpoly1305_avx2_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
@ -33,7 +26,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc,
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2);
crypto_nhpoly1305_update_helper(desc, src, n, nh_avx2);
kernel_fpu_end();
src += n;
srclen -= n;

View File

@ -14,14 +14,7 @@
#include <asm/simd.h>
asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
u8 hash[NH_HASH_BYTES]);
/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len,
__le64 hash[NH_NUM_PASSES])
{
nh_sse2(key, message, message_len, (u8 *)hash);
}
__le64 hash[NH_NUM_PASSES]);
static int nhpoly1305_sse2_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
@ -33,7 +26,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc,
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2);
crypto_nhpoly1305_update_helper(desc, src, n, nh_sse2);
kernel_fpu_end();
src += n;
srclen -= n;

View File

@ -54,6 +54,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#define DIGEST_PTR %rdi /* 1st arg */
#define DATA_PTR %rsi /* 2nd arg */
@ -93,7 +94,7 @@
*/
.text
.align 32
SYM_FUNC_START(sha1_ni_transform)
SYM_TYPED_FUNC_START(sha1_ni_transform)
push %rbp
mov %rsp, %rbp
sub $FRAME_SIZE, %rsp

View File

@ -25,6 +25,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#define CTX %rdi // arg1
#define BUF %rsi // arg2
@ -67,7 +68,7 @@
* param: function's name
*/
.macro SHA1_VECTOR_ASM name
SYM_FUNC_START(\name)
SYM_TYPED_FUNC_START(\name)
push %rbx
push %r12

View File

@ -48,6 +48,7 @@
########################################################################
#include <linux/linkage.h>
#include <linux/cfi_types.h>
## assume buffers not aligned
#define VMOVDQ vmovdqu
@ -346,7 +347,7 @@ a = TMP_
## arg 3 : Num blocks
########################################################################
.text
SYM_FUNC_START(sha256_transform_avx)
SYM_TYPED_FUNC_START(sha256_transform_avx)
.align 32
pushq %rbx
pushq %r12

View File

@ -49,6 +49,7 @@
########################################################################
#include <linux/linkage.h>
#include <linux/cfi_types.h>
## assume buffers not aligned
#define VMOVDQ vmovdqu
@ -523,7 +524,7 @@ STACK_SIZE = _CTX + _CTX_SIZE
## arg 3 : Num blocks
########################################################################
.text
SYM_FUNC_START(sha256_transform_rorx)
SYM_TYPED_FUNC_START(sha256_transform_rorx)
.align 32
pushq %rbx
pushq %r12

View File

@ -47,6 +47,7 @@
########################################################################
#include <linux/linkage.h>
#include <linux/cfi_types.h>
## assume buffers not aligned
#define MOVDQ movdqu
@ -355,7 +356,7 @@ a = TMP_
## arg 3 : Num blocks
########################################################################
.text
SYM_FUNC_START(sha256_transform_ssse3)
SYM_TYPED_FUNC_START(sha256_transform_ssse3)
.align 32
pushq %rbx
pushq %r12

View File

@ -54,6 +54,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#define DIGEST_PTR %rdi /* 1st arg */
#define DATA_PTR %rsi /* 2nd arg */
@ -97,7 +98,7 @@
.text
.align 32
SYM_FUNC_START(sha256_ni_transform)
SYM_TYPED_FUNC_START(sha256_ni_transform)
shl $6, NUM_BLKS /* convert to bytes */
jz .Ldone_hash

View File

@ -48,6 +48,7 @@
########################################################################
#include <linux/linkage.h>
#include <linux/cfi_types.h>
.text
@ -273,7 +274,7 @@ frame_size = frame_WK + WK_SIZE
# of SHA512 message blocks.
# "blocks" is the message length in SHA512 blocks
########################################################################
SYM_FUNC_START(sha512_transform_avx)
SYM_TYPED_FUNC_START(sha512_transform_avx)
test msglen, msglen
je nowork

View File

@ -50,6 +50,7 @@
########################################################################
#include <linux/linkage.h>
#include <linux/cfi_types.h>
.text
@ -565,7 +566,7 @@ frame_size = frame_CTX + CTX_SIZE
# of SHA512 message blocks.
# "blocks" is the message length in SHA512 blocks
########################################################################
SYM_FUNC_START(sha512_transform_rorx)
SYM_TYPED_FUNC_START(sha512_transform_rorx)
# Save GPRs
push %rbx
push %r12

View File

@ -48,6 +48,7 @@
########################################################################
#include <linux/linkage.h>
#include <linux/cfi_types.h>
.text
@ -274,7 +275,7 @@ frame_size = frame_WK + WK_SIZE
# of SHA512 message blocks.
# "blocks" is the message length in SHA512 blocks.
########################################################################
SYM_FUNC_START(sha512_transform_ssse3)
SYM_TYPED_FUNC_START(sha512_transform_ssse3)
test msglen, msglen
je nowork

View File

@ -12,6 +12,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/frame.h>
/* Context structure */
@ -328,7 +329,7 @@
* const u8 *data, int nblocks);
*/
.align 16
SYM_FUNC_START(sm3_transform_avx)
SYM_TYPED_FUNC_START(sm3_transform_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: data (64*nblks bytes)

View File

@ -14,6 +14,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/frame.h>
#define rRIP (%rip)
@ -420,7 +421,7 @@ SYM_FUNC_END(sm4_aesni_avx_crypt8)
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
SYM_TYPED_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (8 blocks)
@ -495,7 +496,7 @@ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
SYM_TYPED_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (8 blocks)
@ -545,7 +546,7 @@ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
SYM_TYPED_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (8 blocks)

View File

@ -14,6 +14,7 @@
*/
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/frame.h>
#define rRIP (%rip)
@ -282,7 +283,7 @@ SYM_FUNC_END(__sm4_crypt_blk16)
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (16 blocks)
@ -395,7 +396,7 @@ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (16 blocks)
@ -449,7 +450,7 @@ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
* const u8 *src, u8 *iv)
*/
.align 8
SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
SYM_TYPED_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
/* input:
* %rdi: round key array, CTX
* %rsi: dst (16 blocks)

View File

@ -38,8 +38,8 @@
* Third Edition.
*/
#include <crypto/algapi.h>
#include <crypto/twofish.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>

View File

@ -175,9 +175,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS
This is intended for developer use only, as these tests take much
longer to run than the normal self tests.
config CRYPTO_GF128MUL
tristate
config CRYPTO_NULL
tristate "Null algorithms"
select CRYPTO_NULL2
@ -714,9 +711,9 @@ config CRYPTO_KEYWRAP
config CRYPTO_LRW
tristate "LRW (Liskov Rivest Wagner)"
select CRYPTO_LIB_GF128MUL
select CRYPTO_SKCIPHER
select CRYPTO_MANAGER
select CRYPTO_GF128MUL
select CRYPTO_ECB
help
LRW (Liskov Rivest Wagner) mode
@ -926,8 +923,8 @@ config CRYPTO_CMAC
config CRYPTO_GHASH
tristate "GHASH"
select CRYPTO_GF128MUL
select CRYPTO_HASH
select CRYPTO_LIB_GF128MUL
help
GCM GHASH function (NIST SP800-38D)
@ -967,8 +964,8 @@ config CRYPTO_MICHAEL_MIC
config CRYPTO_POLYVAL
tristate
select CRYPTO_GF128MUL
select CRYPTO_HASH
select CRYPTO_LIB_GF128MUL
help
POLYVAL hash function for HCTR2

View File

@ -85,7 +85,6 @@ obj-$(CONFIG_CRYPTO_WP512) += wp512.o
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
CFLAGS_blake2b_generic.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930
obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
obj-$(CONFIG_CRYPTO_CBC) += cbc.o
obj-$(CONFIG_CRYPTO_CFB) += cfb.o

View File

@ -48,11 +48,11 @@
*/
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/crypto.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>

View File

@ -6,7 +6,7 @@
*/
#include <crypto/aes.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <linux/module.h>
static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key,

View File

@ -12,6 +12,8 @@
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/key.h>
#include <linux/key-type.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/net.h>
@ -19,6 +21,10 @@
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/security.h>
#include <linux/string.h>
#include <keys/user-type.h>
#include <keys/trusted-type.h>
#include <keys/encrypted-type.h>
struct alg_type_list {
const struct af_alg_type *type;
@ -222,6 +228,129 @@ static int alg_setkey(struct sock *sk, sockptr_t ukey, unsigned int keylen)
return err;
}
#ifdef CONFIG_KEYS
static const u8 *key_data_ptr_user(const struct key *key,
unsigned int *datalen)
{
const struct user_key_payload *ukp;
ukp = user_key_payload_locked(key);
if (IS_ERR_OR_NULL(ukp))
return ERR_PTR(-EKEYREVOKED);
*datalen = key->datalen;
return ukp->data;
}
static const u8 *key_data_ptr_encrypted(const struct key *key,
unsigned int *datalen)
{
const struct encrypted_key_payload *ekp;
ekp = dereference_key_locked(key);
if (IS_ERR_OR_NULL(ekp))
return ERR_PTR(-EKEYREVOKED);
*datalen = ekp->decrypted_datalen;
return ekp->decrypted_data;
}
static const u8 *key_data_ptr_trusted(const struct key *key,
unsigned int *datalen)
{
const struct trusted_key_payload *tkp;
tkp = dereference_key_locked(key);
if (IS_ERR_OR_NULL(tkp))
return ERR_PTR(-EKEYREVOKED);
*datalen = tkp->key_len;
return tkp->key;
}
static struct key *lookup_key(key_serial_t serial)
{
key_ref_t key_ref;
key_ref = lookup_user_key(serial, 0, KEY_NEED_SEARCH);
if (IS_ERR(key_ref))
return ERR_CAST(key_ref);
return key_ref_to_ptr(key_ref);
}
static int alg_setkey_by_key_serial(struct alg_sock *ask, sockptr_t optval,
unsigned int optlen)
{
const struct af_alg_type *type = ask->type;
u8 *key_data = NULL;
unsigned int key_datalen;
key_serial_t serial;
struct key *key;
const u8 *ret;
int err;
if (optlen != sizeof(serial))
return -EINVAL;
if (copy_from_sockptr(&serial, optval, optlen))
return -EFAULT;
key = lookup_key(serial);
if (IS_ERR(key))
return PTR_ERR(key);
down_read(&key->sem);
ret = ERR_PTR(-ENOPROTOOPT);
if (!strcmp(key->type->name, "user") ||
!strcmp(key->type->name, "logon")) {
ret = key_data_ptr_user(key, &key_datalen);
} else if (IS_REACHABLE(CONFIG_ENCRYPTED_KEYS) &&
!strcmp(key->type->name, "encrypted")) {
ret = key_data_ptr_encrypted(key, &key_datalen);
} else if (IS_REACHABLE(CONFIG_TRUSTED_KEYS) &&
!strcmp(key->type->name, "trusted")) {
ret = key_data_ptr_trusted(key, &key_datalen);
}
if (IS_ERR(ret)) {
up_read(&key->sem);
return PTR_ERR(ret);
}
key_data = sock_kmalloc(&ask->sk, key_datalen, GFP_KERNEL);
if (!key_data) {
up_read(&key->sem);
return -ENOMEM;
}
memcpy(key_data, ret, key_datalen);
up_read(&key->sem);
err = type->setkey(ask->private, key_data, key_datalen);
sock_kzfree_s(&ask->sk, key_data, key_datalen);
return err;
}
#else
static inline int alg_setkey_by_key_serial(struct alg_sock *ask,
sockptr_t optval,
unsigned int optlen)
{
return -ENOPROTOOPT;
}
#endif
static int alg_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@ -242,12 +371,16 @@ static int alg_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case ALG_SET_KEY:
case ALG_SET_KEY_BY_KEY_SERIAL:
if (sock->state == SS_CONNECTED)
goto unlock;
if (!type->setkey)
goto unlock;
err = alg_setkey(sk, optval, optlen);
if (optname == ALG_SET_KEY_BY_KEY_SERIAL)
err = alg_setkey_by_key_serial(ask, optval, optlen);
else
err = alg_setkey(sk, optval, optlen);
break;
case ALG_SET_AEAD_AUTHSIZE:
if (sock->state == SS_CONNECTED)

View File

@ -222,12 +222,65 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
}
EXPORT_SYMBOL_GPL(crypto_remove_spawns);
static void crypto_alg_finish_registration(struct crypto_alg *alg,
bool fulfill_requests,
struct list_head *algs_to_put)
{
struct crypto_alg *q;
list_for_each_entry(q, &crypto_alg_list, cra_list) {
if (q == alg)
continue;
if (crypto_is_moribund(q))
continue;
if (crypto_is_larval(q)) {
struct crypto_larval *larval = (void *)q;
/*
* Check to see if either our generic name or
* specific name can satisfy the name requested
* by the larval entry q.
*/
if (strcmp(alg->cra_name, q->cra_name) &&
strcmp(alg->cra_driver_name, q->cra_name))
continue;
if (larval->adult)
continue;
if ((q->cra_flags ^ alg->cra_flags) & larval->mask)
continue;
if (fulfill_requests && crypto_mod_get(alg))
larval->adult = alg;
else
larval->adult = ERR_PTR(-EAGAIN);
continue;
}
if (strcmp(alg->cra_name, q->cra_name))
continue;
if (strcmp(alg->cra_driver_name, q->cra_driver_name) &&
q->cra_priority > alg->cra_priority)
continue;
crypto_remove_spawns(q, algs_to_put, alg);
}
crypto_notify(CRYPTO_MSG_ALG_LOADED, alg);
}
static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg)
{
struct crypto_larval *larval;
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER))
return NULL;
if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER) ||
IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) ||
(alg->cra_flags & CRYPTO_ALG_INTERNAL))
return NULL; /* No self-test needed */
larval = crypto_larval_alloc(alg->cra_name,
alg->cra_flags | CRYPTO_ALG_TESTED, 0);
@ -248,7 +301,8 @@ static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg)
return larval;
}
static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
static struct crypto_larval *
__crypto_register_alg(struct crypto_alg *alg, struct list_head *algs_to_put)
{
struct crypto_alg *q;
struct crypto_larval *larval;
@ -259,9 +313,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
INIT_LIST_HEAD(&alg->cra_users);
/* No cheating! */
alg->cra_flags &= ~CRYPTO_ALG_TESTED;
ret = -EEXIST;
list_for_each_entry(q, &crypto_alg_list, cra_list) {
@ -288,13 +339,18 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
list_add(&alg->cra_list, &crypto_alg_list);
if (larval)
list_add(&larval->alg.cra_list, &crypto_alg_list);
else
alg->cra_flags |= CRYPTO_ALG_TESTED;
crypto_stats_init(alg);
if (larval) {
/* No cheating! */
alg->cra_flags &= ~CRYPTO_ALG_TESTED;
list_add(&larval->alg.cra_list, &crypto_alg_list);
} else {
alg->cra_flags |= CRYPTO_ALG_TESTED;
crypto_alg_finish_registration(alg, true, algs_to_put);
}
out:
return larval;
@ -341,7 +397,10 @@ void crypto_alg_tested(const char *name, int err)
alg->cra_flags |= CRYPTO_ALG_TESTED;
/* Only satisfy larval waiters if we are the best. */
/*
* If a higher-priority implementation of the same algorithm is
* currently being tested, then don't fulfill request larvals.
*/
best = true;
list_for_each_entry(q, &crypto_alg_list, cra_list) {
if (crypto_is_moribund(q) || !crypto_is_larval(q))
@ -356,47 +415,7 @@ void crypto_alg_tested(const char *name, int err)
}
}
list_for_each_entry(q, &crypto_alg_list, cra_list) {
if (q == alg)
continue;
if (crypto_is_moribund(q))
continue;
if (crypto_is_larval(q)) {
struct crypto_larval *larval = (void *)q;
/*
* Check to see if either our generic name or
* specific name can satisfy the name requested
* by the larval entry q.
*/
if (strcmp(alg->cra_name, q->cra_name) &&
strcmp(alg->cra_driver_name, q->cra_name))
continue;
if (larval->adult)
continue;
if ((q->cra_flags ^ alg->cra_flags) & larval->mask)
continue;
if (best && crypto_mod_get(alg))
larval->adult = alg;
else
larval->adult = ERR_PTR(-EAGAIN);
continue;
}
if (strcmp(alg->cra_name, q->cra_name))
continue;
if (strcmp(alg->cra_driver_name, q->cra_driver_name) &&
q->cra_priority > alg->cra_priority)
continue;
crypto_remove_spawns(q, &list, alg);
}
crypto_alg_finish_registration(alg, best, &list);
complete:
complete_all(&test->completion);
@ -423,7 +442,8 @@ EXPORT_SYMBOL_GPL(crypto_remove_final);
int crypto_register_alg(struct crypto_alg *alg)
{
struct crypto_larval *larval;
bool test_started;
LIST_HEAD(algs_to_put);
bool test_started = false;
int err;
alg->cra_flags &= ~CRYPTO_ALG_DEAD;
@ -432,17 +452,18 @@ int crypto_register_alg(struct crypto_alg *alg)
return err;
down_write(&crypto_alg_sem);
larval = __crypto_register_alg(alg);
test_started = static_key_enabled(&crypto_boot_test_finished);
if (!IS_ERR_OR_NULL(larval))
larval = __crypto_register_alg(alg, &algs_to_put);
if (!IS_ERR_OR_NULL(larval)) {
test_started = crypto_boot_test_finished();
larval->test_started = test_started;
}
up_write(&crypto_alg_sem);
if (IS_ERR_OR_NULL(larval))
if (IS_ERR(larval))
return PTR_ERR(larval);
if (test_started)
crypto_wait_for_test(larval);
crypto_remove_final(&algs_to_put);
return 0;
}
EXPORT_SYMBOL_GPL(crypto_register_alg);
@ -619,6 +640,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
struct crypto_larval *larval;
struct crypto_spawn *spawn;
u32 fips_internal = 0;
LIST_HEAD(algs_to_put);
int err;
err = crypto_check_alg(&inst->alg);
@ -650,7 +672,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
inst->alg.cra_flags |= (fips_internal & CRYPTO_ALG_FIPS_INTERNAL);
larval = __crypto_register_alg(&inst->alg);
larval = __crypto_register_alg(&inst->alg, &algs_to_put);
if (IS_ERR(larval))
goto unlock;
else if (larval)
@ -662,15 +684,12 @@ int crypto_register_instance(struct crypto_template *tmpl,
unlock:
up_write(&crypto_alg_sem);
err = PTR_ERR(larval);
if (IS_ERR_OR_NULL(larval))
goto err;
crypto_wait_for_test(larval);
err = 0;
err:
return err;
if (IS_ERR(larval))
return PTR_ERR(larval);
if (larval)
crypto_wait_for_test(larval);
crypto_remove_final(&algs_to_put);
return 0;
}
EXPORT_SYMBOL_GPL(crypto_register_instance);
@ -1234,6 +1253,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_skcipher_decrypt);
static void __init crypto_start_tests(void)
{
if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
return;
for (;;) {
struct crypto_larval *larval = NULL;
struct crypto_alg *q;
@ -1267,7 +1289,7 @@ static void __init crypto_start_tests(void)
crypto_wait_for_test(larval);
}
static_branch_enable(&crypto_boot_test_finished);
set_crypto_boot_test_finished();
}
static int __init crypto_algapi_init(void)

View File

@ -175,18 +175,10 @@ static int cryptomgr_test(void *data)
{
struct crypto_test_param *param = data;
u32 type = param->type;
int err = 0;
#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
goto skiptest;
#endif
if (type & CRYPTO_ALG_TESTED)
goto skiptest;
int err;
err = alg_test(param->driver, param->alg, type, CRYPTO_ALG_TESTED);
skiptest:
crypto_alg_tested(param->driver, err);
kfree(param);
@ -197,7 +189,9 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg)
{
struct task_struct *thread;
struct crypto_test_param *param;
u32 type;
if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
return NOTIFY_DONE;
if (!try_module_get(THIS_MODULE))
goto err;
@ -208,13 +202,7 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg)
memcpy(param->driver, alg->cra_driver_name, sizeof(param->driver));
memcpy(param->alg, alg->cra_name, sizeof(param->alg));
type = alg->cra_flags;
/* Do not test internal algorithms. */
if (type & CRYPTO_ALG_INTERNAL)
type |= CRYPTO_ALG_TESTED;
param->type = type;
param->type = alg->cra_flags;
thread = kthread_run(cryptomgr_test, param, "cryptomgr_test");
if (IS_ERR(thread))

View File

@ -29,11 +29,11 @@
*
*/
#include <crypto/algapi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <asm/byteorder.h>
#include <linux/crypto.h>
#include <linux/types.h>
#define ANUBIS_MIN_KEY_SIZE 16

View File

@ -31,8 +31,10 @@ EXPORT_SYMBOL_GPL(crypto_alg_sem);
BLOCKING_NOTIFIER_HEAD(crypto_chain);
EXPORT_SYMBOL_GPL(crypto_chain);
DEFINE_STATIC_KEY_FALSE(crypto_boot_test_finished);
EXPORT_SYMBOL_GPL(crypto_boot_test_finished);
#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
DEFINE_STATIC_KEY_FALSE(__crypto_boot_test_finished);
EXPORT_SYMBOL_GPL(__crypto_boot_test_finished);
#endif
static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
@ -172,9 +174,6 @@ void crypto_wait_for_test(struct crypto_larval *larval)
err = wait_for_completion_killable(&larval->completion);
WARN_ON(err);
if (!err)
crypto_notify(CRYPTO_MSG_ALG_LOADED, larval);
out:
crypto_larval_kill(&larval->alg);
}
@ -205,7 +204,7 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
struct crypto_larval *larval = (void *)alg;
long timeout;
if (!static_branch_likely(&crypto_boot_test_finished))
if (!crypto_boot_test_finished())
crypto_start_test(larval);
timeout = wait_for_completion_killable_timeout(

View File

@ -14,11 +14,12 @@
* Copyright (c) Kyle McMartin <kyle@debian.org>
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
*/
#include <crypto/algapi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <asm/byteorder.h>
#include <linux/crypto.h>
#include <linux/types.h>
#include <crypto/blowfish.h>

View File

@ -11,11 +11,12 @@
* Copyright (c) Kyle McMartin <kyle@debian.org>
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
*/
#include <crypto/algapi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <asm/unaligned.h>
#include <linux/crypto.h>
#include <linux/types.h>
#include <crypto/blowfish.h>

View File

@ -9,7 +9,7 @@
* https://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html
*/
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>

View File

@ -14,8 +14,8 @@
#include <asm/unaligned.h>
#include <crypto/algapi.h>
#include <linux/init.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/string.h>

View File

@ -11,8 +11,8 @@
#include <asm/unaligned.h>
#include <crypto/algapi.h>
#include <linux/init.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/string.h>

View File

@ -218,7 +218,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
cryptlen += ilen;
}
ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen);
ahash_request_set_crypt(ahreq, plain, odata, cryptlen);
err = crypto_ahash_finup(ahreq);
out:
return err;

View File

@ -68,11 +68,12 @@ struct aead_instance_ctx {
struct cryptd_skcipher_ctx {
refcount_t refcnt;
struct crypto_sync_skcipher *child;
struct crypto_skcipher *child;
};
struct cryptd_skcipher_request_ctx {
crypto_completion_t complete;
struct skcipher_request req;
};
struct cryptd_hash_ctx {
@ -227,13 +228,13 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
const u8 *key, unsigned int keylen)
{
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
struct crypto_sync_skcipher *child = ctx->child;
struct crypto_skcipher *child = ctx->child;
crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_sync_skcipher_set_flags(child,
crypto_skcipher_get_flags(parent) &
CRYPTO_TFM_REQ_MASK);
return crypto_sync_skcipher_setkey(child, key, keylen);
crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_skcipher_set_flags(child,
crypto_skcipher_get_flags(parent) &
CRYPTO_TFM_REQ_MASK);
return crypto_skcipher_setkey(child, key, keylen);
}
static void cryptd_skcipher_complete(struct skcipher_request *req, int err)
@ -258,13 +259,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_sync_skcipher *child = ctx->child;
SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
struct skcipher_request *subreq = &rctx->req;
struct crypto_skcipher *child = ctx->child;
if (unlikely(err == -EINPROGRESS))
goto out;
skcipher_request_set_sync_tfm(subreq, child);
skcipher_request_set_tfm(subreq, child);
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@ -286,13 +287,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_sync_skcipher *child = ctx->child;
SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
struct skcipher_request *subreq = &rctx->req;
struct crypto_skcipher *child = ctx->child;
if (unlikely(err == -EINPROGRESS))
goto out;
skcipher_request_set_sync_tfm(subreq, child);
skcipher_request_set_tfm(subreq, child);
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@ -343,9 +344,10 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
if (IS_ERR(cipher))
return PTR_ERR(cipher);
ctx->child = (struct crypto_sync_skcipher *)cipher;
ctx->child = cipher;
crypto_skcipher_set_reqsize(
tfm, sizeof(struct cryptd_skcipher_request_ctx));
tfm, sizeof(struct cryptd_skcipher_request_ctx) +
crypto_skcipher_reqsize(cipher));
return 0;
}
@ -353,7 +355,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
{
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
crypto_free_sync_skcipher(ctx->child);
crypto_free_skcipher(ctx->child);
}
static void cryptd_skcipher_free(struct skcipher_instance *inst)
@ -931,7 +933,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
{
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
return &ctx->child->base;
return ctx->child;
}
EXPORT_SYMBOL_GPL(cryptd_skcipher_child);

View File

@ -8,11 +8,11 @@
*/
#include <asm/byteorder.h>
#include <crypto/algapi.h>
#include <linux/bitops.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/crypto.h>
#include <crypto/internal/des.h>

View File

@ -318,6 +318,9 @@ static int dh_safe_prime_init_tfm(struct crypto_kpp *tfm)
if (IS_ERR(tfm_ctx->dh_tfm))
return PTR_ERR(tfm_ctx->dh_tfm);
kpp_set_reqsize(tfm, sizeof(struct kpp_request) +
crypto_kpp_reqsize(tfm_ctx->dh_tfm));
return 0;
}
@ -593,7 +596,6 @@ static int __maybe_unused __dh_safe_prime_create(
inst->alg.max_size = dh_safe_prime_max_size;
inst->alg.init = dh_safe_prime_init_tfm;
inst->alg.exit = dh_safe_prime_exit_tfm;
inst->alg.reqsize = sizeof(struct kpp_request) + dh_alg->reqsize;
inst->alg.base.cra_priority = dh_alg->base.cra_priority;
inst->alg.base.cra_module = THIS_MODULE;
inst->alg.base.cra_ctxsize = sizeof(struct dh_safe_prime_tfm_ctx);

View File

@ -43,10 +43,10 @@
*/
#include <asm/byteorder.h>
#include <crypto/algapi.h>
#include <linux/bitops.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/crypto.h>
#define ROUNDS 16

View File

@ -47,7 +47,25 @@ extern struct list_head crypto_alg_list;
extern struct rw_semaphore crypto_alg_sem;
extern struct blocking_notifier_head crypto_chain;
DECLARE_STATIC_KEY_FALSE(crypto_boot_test_finished);
#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
static inline bool crypto_boot_test_finished(void)
{
return true;
}
static inline void set_crypto_boot_test_finished(void)
{
}
#else
DECLARE_STATIC_KEY_FALSE(__crypto_boot_test_finished);
static inline bool crypto_boot_test_finished(void)
{
return static_branch_likely(&__crypto_boot_test_finished);
}
static inline void set_crypto_boot_test_finished(void)
{
static_branch_enable(&__crypto_boot_test_finished);
}
#endif /* !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS */
#ifdef CONFIG_PROC_FS
void __init crypto_init_proc(void);

View File

@ -125,9 +125,13 @@ static const struct kdf_testvec kdf_ctr_hmac_sha256_tv_template[] = {
static int __init crypto_kdf108_init(void)
{
int ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)",
crypto_kdf108_setkey, crypto_kdf108_ctr_generate);
int ret;
if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
return 0;
ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)",
crypto_kdf108_setkey, crypto_kdf108_ctr_generate);
if (ret) {
if (fips_enabled)
panic("alg: self-tests for CTR-KDF (hmac(sha256)) failed (rc=%d)\n",
@ -136,7 +140,7 @@ static int __init crypto_kdf108_init(void)
WARN(1,
"alg: self-tests for CTR-KDF (hmac(sha256)) failed (rc=%d)\n",
ret);
} else {
} else if (fips_enabled) {
pr_info("alg: self-tests for CTR-KDF (hmac(sha256)) passed\n");
}

View File

@ -19,11 +19,11 @@
*
*/
#include <crypto/algapi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <asm/byteorder.h>
#include <linux/crypto.h>
#include <linux/types.h>
#define KHAZAD_KEY_SIZE 16

View File

@ -579,6 +579,10 @@ static int pkcs1pad_init_tfm(struct crypto_akcipher *tfm)
return PTR_ERR(child_tfm);
ctx->child = child_tfm;
akcipher_set_reqsize(tfm, sizeof(struct pkcs1pad_request) +
crypto_akcipher_reqsize(child_tfm));
return 0;
}
@ -674,7 +678,6 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.set_pub_key = pkcs1pad_set_pub_key;
inst->alg.set_priv_key = pkcs1pad_set_priv_key;
inst->alg.max_size = pkcs1pad_get_max_size;
inst->alg.reqsize = sizeof(struct pkcs1pad_request) + rsa_alg->reqsize;
inst->free = pkcs1pad_free;

View File

@ -8,11 +8,11 @@
* Copyright (C) 2007 Korea Information Security Agency (KISA).
*/
#include <crypto/algapi.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/crypto.h>
#include <asm/byteorder.h>
#define SEED_NUM_KCONSTANTS 16

View File

@ -7,11 +7,11 @@
* Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
*/
#include <crypto/algapi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <asm/unaligned.h>
#include <linux/crypto.h>
#include <linux/types.h>
#include <crypto/serpent.h>

View File

@ -18,26 +18,16 @@
#include "internal.h"
#define MAX_SHASH_ALIGNMASK 63
static const struct crypto_type crypto_shash_type;
static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
{
return -ENOSYS;
}
/*
* Check whether an shash algorithm has a setkey function.
*
* For CFI compatibility, this must not be an inline function. This is because
* when CFI is enabled, modules won't get the same address for shash_no_setkey
* (if it were exported, which inlining would require) as the core kernel will.
*/
bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
{
return alg->setkey != shash_no_setkey;
}
EXPORT_SYMBOL_GPL(crypto_shash_alg_has_setkey);
EXPORT_SYMBOL_GPL(shash_no_setkey);
static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
@ -100,7 +90,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
* We cannot count on __aligned() working for large values:
* https://patchwork.kernel.org/patch/9507697/
*/
u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2];
u8 ubuf[MAX_SHASH_ALIGNMASK * 2];
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
int err;
@ -142,7 +132,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
* We cannot count on __aligned() working for large values:
* https://patchwork.kernel.org/patch/9507697/
*/
u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE];
u8 ubuf[MAX_SHASH_ALIGNMASK + HASH_MAX_DIGESTSIZE];
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
int err;
@ -536,6 +526,9 @@ static int shash_prepare_alg(struct shash_alg *alg)
alg->statesize > HASH_MAX_STATESIZE)
return -EINVAL;
if (base->cra_alignmask > MAX_SHASH_ALIGNMASK)
return -EINVAL;
if ((alg->export && !alg->import) || (alg->import && !alg->export))
return -EINVAL;

View File

@ -763,7 +763,7 @@ struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
struct crypto_skcipher *tfm;
/* Only sync algorithms allowed. */
mask |= CRYPTO_ALG_ASYNC;
mask |= CRYPTO_ALG_ASYNC | CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE;
tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask);

View File

@ -7,12 +7,12 @@
* All rights reserved.
*/
#include <crypto/algapi.h>
#include <crypto/sm4.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/crypto.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>

View File

@ -324,7 +324,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
crypto_req_done, &data[i].wait);
}
pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
pr_info("testing speed of multibuffer %s (%s) %s\n", algo,
get_driver_name(crypto_aead, tfm), e);
i = 0;
@ -506,8 +506,8 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
out:
if (ret == 0)
printk("1 operation in %lu cycles (%d bytes)\n",
(cycles + 4) / 8, blen);
pr_cont("1 operation in %lu cycles (%d bytes)\n",
(cycles + 4) / 8, blen);
return ret;
}
@ -575,8 +575,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
}
crypto_init_wait(&wait);
printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
get_driver_name(crypto_aead, tfm), e);
pr_info("testing speed of %s (%s) %s\n", algo,
get_driver_name(crypto_aead, tfm), e);
req = aead_request_alloc(tfm, GFP_KERNEL);
if (!req) {
@ -624,8 +624,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
memset(iv, 0xff, iv_len);
crypto_aead_clear_flags(tfm, ~0);
printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
i, *keysize * 8, bs);
pr_info("test %u (%d bit key, %d byte blocks): ",
i, *keysize * 8, bs);
memset(tvmem[0], 0xff, PAGE_SIZE);
@ -727,8 +727,8 @@ static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
return ret;
}
printk("%6u opers/sec, %9lu bytes/sec\n",
bcount / secs, ((long)bcount * blen) / secs);
pr_cont("%6u opers/sec, %9lu bytes/sec\n",
bcount / secs, ((long)bcount * blen) / secs);
return 0;
}
@ -877,8 +877,8 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs,
return;
}
printk(KERN_INFO "\ntesting speed of async %s (%s)\n", algo,
get_driver_name(crypto_ahash, tfm));
pr_info("testing speed of async %s (%s)\n", algo,
get_driver_name(crypto_ahash, tfm));
if (crypto_ahash_digestsize(tfm) > MAX_DIGEST_SIZE) {
pr_err("digestsize(%u) > %d\n", crypto_ahash_digestsize(tfm),
@ -1090,15 +1090,6 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
goto out_free_tfm;
}
for (i = 0; i < num_mb; ++i)
if (testmgr_alloc_buf(data[i].xbuf)) {
while (i--)
testmgr_free_buf(data[i].xbuf);
goto out_free_tfm;
}
for (i = 0; i < num_mb; ++i) {
data[i].req = skcipher_request_alloc(tfm, GFP_KERNEL);
if (!data[i].req) {
@ -1117,7 +1108,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
crypto_init_wait(&data[i].wait);
}
pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
pr_info("testing speed of multibuffer %s (%s) %s\n", algo,
get_driver_name(crypto_skcipher, tfm), e);
i = 0;
@ -1324,13 +1315,12 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs,
return;
}
pr_info("\ntesting speed of %s %s (%s) %s\n", async ? "async" : "sync",
pr_info("testing speed of %s %s (%s) %s\n", async ? "async" : "sync",
algo, get_driver_name(crypto_skcipher, tfm), e);
req = skcipher_request_alloc(tfm, GFP_KERNEL);
if (!req) {
pr_err("tcrypt: skcipher: Failed to allocate request for %s\n",
algo);
pr_err("skcipher: Failed to allocate request for %s\n", algo);
goto out;
}
@ -1471,387 +1461,396 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
}
for (i = 1; i < 200; i++)
ret += do_test(NULL, 0, 0, i, num_mb);
ret = min(ret, do_test(NULL, 0, 0, i, num_mb));
break;
case 1:
ret += tcrypt_test("md5");
ret = min(ret, tcrypt_test("md5"));
break;
case 2:
ret += tcrypt_test("sha1");
ret = min(ret, tcrypt_test("sha1"));
break;
case 3:
ret += tcrypt_test("ecb(des)");
ret += tcrypt_test("cbc(des)");
ret += tcrypt_test("ctr(des)");
ret = min(ret, tcrypt_test("ecb(des)"));
ret = min(ret, tcrypt_test("cbc(des)"));
ret = min(ret, tcrypt_test("ctr(des)"));
break;
case 4:
ret += tcrypt_test("ecb(des3_ede)");
ret += tcrypt_test("cbc(des3_ede)");
ret += tcrypt_test("ctr(des3_ede)");
ret = min(ret, tcrypt_test("ecb(des3_ede)"));
ret = min(ret, tcrypt_test("cbc(des3_ede)"));
ret = min(ret, tcrypt_test("ctr(des3_ede)"));
break;
case 5:
ret += tcrypt_test("md4");
ret = min(ret, tcrypt_test("md4"));
break;
case 6:
ret += tcrypt_test("sha256");
ret = min(ret, tcrypt_test("sha256"));
break;
case 7:
ret += tcrypt_test("ecb(blowfish)");
ret += tcrypt_test("cbc(blowfish)");
ret += tcrypt_test("ctr(blowfish)");
ret = min(ret, tcrypt_test("ecb(blowfish)"));
ret = min(ret, tcrypt_test("cbc(blowfish)"));
ret = min(ret, tcrypt_test("ctr(blowfish)"));
break;
case 8:
ret += tcrypt_test("ecb(twofish)");
ret += tcrypt_test("cbc(twofish)");
ret += tcrypt_test("ctr(twofish)");
ret += tcrypt_test("lrw(twofish)");
ret += tcrypt_test("xts(twofish)");
ret = min(ret, tcrypt_test("ecb(twofish)"));
ret = min(ret, tcrypt_test("cbc(twofish)"));
ret = min(ret, tcrypt_test("ctr(twofish)"));
ret = min(ret, tcrypt_test("lrw(twofish)"));
ret = min(ret, tcrypt_test("xts(twofish)"));
break;
case 9:
ret += tcrypt_test("ecb(serpent)");
ret += tcrypt_test("cbc(serpent)");
ret += tcrypt_test("ctr(serpent)");
ret += tcrypt_test("lrw(serpent)");
ret += tcrypt_test("xts(serpent)");
ret = min(ret, tcrypt_test("ecb(serpent)"));
ret = min(ret, tcrypt_test("cbc(serpent)"));
ret = min(ret, tcrypt_test("ctr(serpent)"));
ret = min(ret, tcrypt_test("lrw(serpent)"));
ret = min(ret, tcrypt_test("xts(serpent)"));
break;
case 10:
ret += tcrypt_test("ecb(aes)");
ret += tcrypt_test("cbc(aes)");
ret += tcrypt_test("lrw(aes)");
ret += tcrypt_test("xts(aes)");
ret += tcrypt_test("ctr(aes)");
ret += tcrypt_test("rfc3686(ctr(aes))");
ret += tcrypt_test("ofb(aes)");
ret += tcrypt_test("cfb(aes)");
ret += tcrypt_test("xctr(aes)");
ret = min(ret, tcrypt_test("ecb(aes)"));
ret = min(ret, tcrypt_test("cbc(aes)"));
ret = min(ret, tcrypt_test("lrw(aes)"));
ret = min(ret, tcrypt_test("xts(aes)"));
ret = min(ret, tcrypt_test("ctr(aes)"));
ret = min(ret, tcrypt_test("rfc3686(ctr(aes))"));
ret = min(ret, tcrypt_test("ofb(aes)"));
ret = min(ret, tcrypt_test("cfb(aes)"));
ret = min(ret, tcrypt_test("xctr(aes)"));
break;
case 11:
ret += tcrypt_test("sha384");
ret = min(ret, tcrypt_test("sha384"));
break;
case 12:
ret += tcrypt_test("sha512");
ret = min(ret, tcrypt_test("sha512"));
break;
case 13:
ret += tcrypt_test("deflate");
ret = min(ret, tcrypt_test("deflate"));
break;
case 14:
ret += tcrypt_test("ecb(cast5)");
ret += tcrypt_test("cbc(cast5)");
ret += tcrypt_test("ctr(cast5)");
ret = min(ret, tcrypt_test("ecb(cast5)"));
ret = min(ret, tcrypt_test("cbc(cast5)"));
ret = min(ret, tcrypt_test("ctr(cast5)"));
break;
case 15:
ret += tcrypt_test("ecb(cast6)");
ret += tcrypt_test("cbc(cast6)");
ret += tcrypt_test("ctr(cast6)");
ret += tcrypt_test("lrw(cast6)");
ret += tcrypt_test("xts(cast6)");
ret = min(ret, tcrypt_test("ecb(cast6)"));
ret = min(ret, tcrypt_test("cbc(cast6)"));
ret = min(ret, tcrypt_test("ctr(cast6)"));
ret = min(ret, tcrypt_test("lrw(cast6)"));
ret = min(ret, tcrypt_test("xts(cast6)"));
break;
case 16:
ret += tcrypt_test("ecb(arc4)");
ret = min(ret, tcrypt_test("ecb(arc4)"));
break;
case 17:
ret += tcrypt_test("michael_mic");
ret = min(ret, tcrypt_test("michael_mic"));
break;
case 18:
ret += tcrypt_test("crc32c");
ret = min(ret, tcrypt_test("crc32c"));
break;
case 19:
ret += tcrypt_test("ecb(tea)");
ret = min(ret, tcrypt_test("ecb(tea)"));
break;
case 20:
ret += tcrypt_test("ecb(xtea)");
ret = min(ret, tcrypt_test("ecb(xtea)"));
break;
case 21:
ret += tcrypt_test("ecb(khazad)");
ret = min(ret, tcrypt_test("ecb(khazad)"));
break;
case 22:
ret += tcrypt_test("wp512");
ret = min(ret, tcrypt_test("wp512"));
break;
case 23:
ret += tcrypt_test("wp384");
ret = min(ret, tcrypt_test("wp384"));
break;
case 24:
ret += tcrypt_test("wp256");
ret = min(ret, tcrypt_test("wp256"));
break;
case 26:
ret += tcrypt_test("ecb(anubis)");
ret += tcrypt_test("cbc(anubis)");
ret = min(ret, tcrypt_test("ecb(anubis)"));
ret = min(ret, tcrypt_test("cbc(anubis)"));
break;
case 30:
ret += tcrypt_test("ecb(xeta)");
ret = min(ret, tcrypt_test("ecb(xeta)"));
break;
case 31:
ret += tcrypt_test("pcbc(fcrypt)");
ret = min(ret, tcrypt_test("pcbc(fcrypt)"));
break;
case 32:
ret += tcrypt_test("ecb(camellia)");
ret += tcrypt_test("cbc(camellia)");
ret += tcrypt_test("ctr(camellia)");
ret += tcrypt_test("lrw(camellia)");
ret += tcrypt_test("xts(camellia)");
ret = min(ret, tcrypt_test("ecb(camellia)"));
ret = min(ret, tcrypt_test("cbc(camellia)"));
ret = min(ret, tcrypt_test("ctr(camellia)"));
ret = min(ret, tcrypt_test("lrw(camellia)"));
ret = min(ret, tcrypt_test("xts(camellia)"));
break;
case 33:
ret += tcrypt_test("sha224");
ret = min(ret, tcrypt_test("sha224"));
break;
case 35:
ret += tcrypt_test("gcm(aes)");
ret = min(ret, tcrypt_test("gcm(aes)"));
break;
case 36:
ret += tcrypt_test("lzo");
ret = min(ret, tcrypt_test("lzo"));
break;
case 37:
ret += tcrypt_test("ccm(aes)");
ret = min(ret, tcrypt_test("ccm(aes)"));
break;
case 38:
ret += tcrypt_test("cts(cbc(aes))");
ret = min(ret, tcrypt_test("cts(cbc(aes))"));
break;
case 39:
ret += tcrypt_test("xxhash64");
ret = min(ret, tcrypt_test("xxhash64"));
break;
case 40:
ret += tcrypt_test("rmd160");
ret = min(ret, tcrypt_test("rmd160"));
break;
case 42:
ret += tcrypt_test("blake2b-512");
ret = min(ret, tcrypt_test("blake2b-512"));
break;
case 43:
ret += tcrypt_test("ecb(seed)");
ret = min(ret, tcrypt_test("ecb(seed)"));
break;
case 45:
ret += tcrypt_test("rfc4309(ccm(aes))");
ret = min(ret, tcrypt_test("rfc4309(ccm(aes))"));
break;
case 46:
ret += tcrypt_test("ghash");
ret = min(ret, tcrypt_test("ghash"));
break;
case 47:
ret += tcrypt_test("crct10dif");
ret = min(ret, tcrypt_test("crct10dif"));
break;
case 48:
ret += tcrypt_test("sha3-224");
ret = min(ret, tcrypt_test("sha3-224"));
break;
case 49:
ret += tcrypt_test("sha3-256");
ret = min(ret, tcrypt_test("sha3-256"));
break;
case 50:
ret += tcrypt_test("sha3-384");
ret = min(ret, tcrypt_test("sha3-384"));
break;
case 51:
ret += tcrypt_test("sha3-512");
ret = min(ret, tcrypt_test("sha3-512"));
break;
case 52:
ret += tcrypt_test("sm3");
ret = min(ret, tcrypt_test("sm3"));
break;
case 53:
ret += tcrypt_test("streebog256");
ret = min(ret, tcrypt_test("streebog256"));
break;
case 54:
ret += tcrypt_test("streebog512");
ret = min(ret, tcrypt_test("streebog512"));
break;
case 55:
ret += tcrypt_test("gcm(sm4)");
ret = min(ret, tcrypt_test("gcm(sm4)"));
break;
case 56:
ret += tcrypt_test("ccm(sm4)");
ret = min(ret, tcrypt_test("ccm(sm4)"));
break;
case 57:
ret += tcrypt_test("polyval");
ret = min(ret, tcrypt_test("polyval"));
break;
case 58:
ret += tcrypt_test("gcm(aria)");
ret = min(ret, tcrypt_test("gcm(aria)"));
break;
case 59:
ret = min(ret, tcrypt_test("cts(cbc(sm4))"));
break;
case 100:
ret += tcrypt_test("hmac(md5)");
ret = min(ret, tcrypt_test("hmac(md5)"));
break;
case 101:
ret += tcrypt_test("hmac(sha1)");
ret = min(ret, tcrypt_test("hmac(sha1)"));
break;
case 102:
ret += tcrypt_test("hmac(sha256)");
ret = min(ret, tcrypt_test("hmac(sha256)"));
break;
case 103:
ret += tcrypt_test("hmac(sha384)");
ret = min(ret, tcrypt_test("hmac(sha384)"));
break;
case 104:
ret += tcrypt_test("hmac(sha512)");
ret = min(ret, tcrypt_test("hmac(sha512)"));
break;
case 105:
ret += tcrypt_test("hmac(sha224)");
ret = min(ret, tcrypt_test("hmac(sha224)"));
break;
case 106:
ret += tcrypt_test("xcbc(aes)");
ret = min(ret, tcrypt_test("xcbc(aes)"));
break;
case 108:
ret += tcrypt_test("hmac(rmd160)");
ret = min(ret, tcrypt_test("hmac(rmd160)"));
break;
case 109:
ret += tcrypt_test("vmac64(aes)");
ret = min(ret, tcrypt_test("vmac64(aes)"));
break;
case 111:
ret += tcrypt_test("hmac(sha3-224)");
ret = min(ret, tcrypt_test("hmac(sha3-224)"));
break;
case 112:
ret += tcrypt_test("hmac(sha3-256)");
ret = min(ret, tcrypt_test("hmac(sha3-256)"));
break;
case 113:
ret += tcrypt_test("hmac(sha3-384)");
ret = min(ret, tcrypt_test("hmac(sha3-384)"));
break;
case 114:
ret += tcrypt_test("hmac(sha3-512)");
ret = min(ret, tcrypt_test("hmac(sha3-512)"));
break;
case 115:
ret += tcrypt_test("hmac(streebog256)");
ret = min(ret, tcrypt_test("hmac(streebog256)"));
break;
case 116:
ret += tcrypt_test("hmac(streebog512)");
ret = min(ret, tcrypt_test("hmac(streebog512)"));
break;
case 150:
ret += tcrypt_test("ansi_cprng");
ret = min(ret, tcrypt_test("ansi_cprng"));
break;
case 151:
ret += tcrypt_test("rfc4106(gcm(aes))");
ret = min(ret, tcrypt_test("rfc4106(gcm(aes))"));
break;
case 152:
ret += tcrypt_test("rfc4543(gcm(aes))");
ret = min(ret, tcrypt_test("rfc4543(gcm(aes))"));
break;
case 153:
ret += tcrypt_test("cmac(aes)");
ret = min(ret, tcrypt_test("cmac(aes)"));
break;
case 154:
ret += tcrypt_test("cmac(des3_ede)");
ret = min(ret, tcrypt_test("cmac(des3_ede)"));
break;
case 155:
ret += tcrypt_test("authenc(hmac(sha1),cbc(aes))");
ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(aes))"));
break;
case 156:
ret += tcrypt_test("authenc(hmac(md5),ecb(cipher_null))");
ret = min(ret, tcrypt_test("authenc(hmac(md5),ecb(cipher_null))"));
break;
case 157:
ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
ret = min(ret, tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))"));
break;
case 158:
ret += tcrypt_test("cbcmac(sm4)");
ret = min(ret, tcrypt_test("cbcmac(sm4)"));
break;
case 159:
ret += tcrypt_test("cmac(sm4)");
ret = min(ret, tcrypt_test("cmac(sm4)"));
break;
case 160:
ret = min(ret, tcrypt_test("xcbc(sm4)"));
break;
case 181:
ret += tcrypt_test("authenc(hmac(sha1),cbc(des))");
ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des))"));
break;
case 182:
ret += tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))");
ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))"));
break;
case 183:
ret += tcrypt_test("authenc(hmac(sha224),cbc(des))");
ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des))"));
break;
case 184:
ret += tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))");
ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))"));
break;
case 185:
ret += tcrypt_test("authenc(hmac(sha256),cbc(des))");
ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des))"));
break;
case 186:
ret += tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))");
ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))"));
break;
case 187:
ret += tcrypt_test("authenc(hmac(sha384),cbc(des))");
ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des))"));
break;
case 188:
ret += tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))");
ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))"));
break;
case 189:
ret += tcrypt_test("authenc(hmac(sha512),cbc(des))");
ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des))"));
break;
case 190:
ret += tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))");
ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))"));
break;
case 191:
ret += tcrypt_test("ecb(sm4)");
ret += tcrypt_test("cbc(sm4)");
ret += tcrypt_test("cfb(sm4)");
ret += tcrypt_test("ctr(sm4)");
ret = min(ret, tcrypt_test("ecb(sm4)"));
ret = min(ret, tcrypt_test("cbc(sm4)"));
ret = min(ret, tcrypt_test("cfb(sm4)"));
ret = min(ret, tcrypt_test("ctr(sm4)"));
ret = min(ret, tcrypt_test("xts(sm4)"));
break;
case 192:
ret += tcrypt_test("ecb(aria)");
ret += tcrypt_test("cbc(aria)");
ret += tcrypt_test("cfb(aria)");
ret += tcrypt_test("ctr(aria)");
ret = min(ret, tcrypt_test("ecb(aria)"));
ret = min(ret, tcrypt_test("cbc(aria)"));
ret = min(ret, tcrypt_test("cfb(aria)"));
ret = min(ret, tcrypt_test("ctr(aria)"));
break;
case 200:
test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
@ -2109,6 +2108,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("cts(cbc(sm4))", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("cts(cbc(sm4))", DECRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
@ -2117,6 +2120,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_32);
test_cipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0,
speed_template_32);
break;
case 219:
@ -2630,6 +2637,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
test_acipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_32);
test_acipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0,
speed_template_32);
break;
case 519:
@ -2885,7 +2896,7 @@ static int __init tcrypt_mod_init(void)
err = do_test(alg, type, mask, mode, num_mb);
if (err) {
printk(KERN_ERR "tcrypt: one or more tests failed!\n");
pr_err("one or more tests failed!\n");
goto err_free_tv;
} else {
pr_debug("all tests passed\n");

View File

@ -14,11 +14,11 @@
* Copyright (c) 2004 Aaron Grothe ajgrothe@yahoo.com
*/
#include <crypto/algapi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <asm/byteorder.h>
#include <linux/crypto.h>
#include <linux/types.h>
#define TEA_KEY_SIZE 16

View File

@ -4712,6 +4712,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.alg = "cts(cbc(paes))",
.test = alg_test_null,
.fips_allowed = 1,
}, {
.alg = "cts(cbc(sm4))",
.test = alg_test_skcipher,
.suite = {
.cipher = __VECS(sm4_cts_tv_template)
}
}, {
.alg = "curve25519",
.test = alg_test_kpp,
@ -5586,6 +5592,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(aes_xcbc128_tv_template)
}
}, {
.alg = "xcbc(sm4)",
.test = alg_test_hash,
.suite = {
.hash = __VECS(sm4_xcbc128_tv_template)
}
}, {
.alg = "xchacha12",
.test = alg_test_skcipher,
@ -5640,6 +5652,13 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.cipher = __VECS(serpent_xts_tv_template)
}
}, {
.alg = "xts(sm4)",
.generic_driver = "xts(ecb(sm4-generic))",
.test = alg_test_skcipher,
.suite = {
.cipher = __VECS(sm4_xts_tv_template)
}
}, {
.alg = "xts(twofish)",
.generic_driver = "xts(ecb(twofish-generic))",

File diff suppressed because it is too large Load Diff

View File

@ -25,9 +25,9 @@
* Third Edition.
*/
#include <crypto/algapi.h>
#include <crypto/twofish.h>
#include <linux/bitops.h>
#include <linux/crypto.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>

View File

@ -25,12 +25,12 @@
*/
#include <asm/unaligned.h>
#include <crypto/algapi.h>
#include <crypto/twofish.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/crypto.h>
#include <linux/bitops.h>
/* Macros to compute the g() function in the encryption and decryption

View File

@ -143,15 +143,19 @@ static int __init amd_rng_mod_init(void)
found:
err = pci_read_config_dword(pdev, 0x58, &pmbase);
if (err)
return err;
goto put_dev;
pmbase &= 0x0000FF00;
if (pmbase == 0)
return -EIO;
if (pmbase == 0) {
err = -EIO;
goto put_dev;
}
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
if (!priv) {
err = -ENOMEM;
goto put_dev;
}
if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) {
dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n",
@ -185,6 +189,8 @@ static int __init amd_rng_mod_init(void)
release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE);
out:
kfree(priv);
put_dev:
pci_dev_put(pdev);
return err;
}
@ -200,6 +206,8 @@ static void __exit amd_rng_mod_exit(void)
release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE);
pci_dev_put(priv->pcidev);
kfree(priv);
}

View File

@ -225,7 +225,6 @@ static int cavium_rng_probe_vf(struct pci_dev *pdev,
return -ENOMEM;
rng->ops.read = cavium_rng_read;
rng->ops.quality = 1000;
pci_set_drvdata(pdev, rng);

View File

@ -145,7 +145,6 @@ static int cn10k_rng_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENOMEM;
rng->ops.read = cn10k_rng_read;
rng->ops.quality = 1000;
rng->ops.priv = (unsigned long)rng;
reset_rng_health_state(rng);

View File

@ -41,14 +41,14 @@ static DEFINE_MUTEX(reading_mutex);
static int data_avail;
static u8 *rng_buffer, *rng_fillbuf;
static unsigned short current_quality;
static unsigned short default_quality; /* = 0; default to "off" */
static unsigned short default_quality = 1024; /* default to maximum */
module_param(current_quality, ushort, 0644);
MODULE_PARM_DESC(current_quality,
"current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead");
module_param(default_quality, ushort, 0644);
MODULE_PARM_DESC(default_quality,
"default entropy content of hwrng per 1024 bits of input");
"default maximum entropy content of hwrng per 1024 bits of input");
static void drop_current_rng(void);
static int hwrng_init(struct hwrng *rng);
@ -172,10 +172,7 @@ static int hwrng_init(struct hwrng *rng)
reinit_completion(&rng->cleanup_done);
skip_init:
if (!rng->quality)
rng->quality = default_quality;
if (rng->quality > 1024)
rng->quality = 1024;
rng->quality = min_t(u16, min_t(u16, default_quality, 1024), rng->quality ?: 1024);
current_quality = rng->quality; /* obsolete */
return 0;

View File

@ -51,6 +51,10 @@ static const struct pci_device_id pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, pci_tbl);
struct amd_geode_priv {
struct pci_dev *pcidev;
void __iomem *membase;
};
static int geode_rng_data_read(struct hwrng *rng, u32 *data)
{
@ -90,6 +94,7 @@ static int __init geode_rng_init(void)
const struct pci_device_id *ent;
void __iomem *mem;
unsigned long rng_base;
struct amd_geode_priv *priv;
for_each_pci_dev(pdev) {
ent = pci_match_id(pci_tbl, pdev);
@ -97,17 +102,26 @@ static int __init geode_rng_init(void)
goto found;
}
/* Device not found. */
goto out;
return err;
found:
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv) {
err = -ENOMEM;
goto put_dev;
}
rng_base = pci_resource_start(pdev, 0);
if (rng_base == 0)
goto out;
goto free_priv;
err = -ENOMEM;
mem = ioremap(rng_base, 0x58);
if (!mem)
goto out;
geode_rng.priv = (unsigned long)mem;
goto free_priv;
geode_rng.priv = (unsigned long)priv;
priv->membase = mem;
priv->pcidev = pdev;
pr_info("AMD Geode RNG detected\n");
err = hwrng_register(&geode_rng);
@ -116,20 +130,26 @@ static int __init geode_rng_init(void)
err);
goto err_unmap;
}
out:
return err;
err_unmap:
iounmap(mem);
goto out;
free_priv:
kfree(priv);
put_dev:
pci_dev_put(pdev);
return err;
}
static void __exit geode_rng_exit(void)
{
void __iomem *mem = (void __iomem *)geode_rng.priv;
struct amd_geode_priv *priv;
priv = (struct amd_geode_priv *)geode_rng.priv;
hwrng_unregister(&geode_rng);
iounmap(mem);
iounmap(priv->membase);
pci_dev_put(priv->pcidev);
kfree(priv);
}
module_init(geode_rng_init);

View File

@ -78,7 +78,6 @@ static int mpfs_rng_probe(struct platform_device *pdev)
rng_priv->rng.read = mpfs_rng_read;
rng_priv->rng.name = pdev->name;
rng_priv->rng.quality = 1024;
platform_set_drvdata(pdev, rng_priv);

View File

@ -22,7 +22,7 @@
#define RNG_AUTOSUSPEND_TIMEOUT 100
#define USEC_POLL 2
#define TIMEOUT_POLL 20
#define TIMEOUT_POLL 60
#define RNG_CTRL 0x00
#define RNG_EN BIT(0)
@ -77,7 +77,7 @@ static bool mtk_rng_wait_ready(struct hwrng *rng, bool wait)
readl_poll_timeout_atomic(priv->base + RNG_CTRL, ready,
ready & RNG_READY, USEC_POLL,
TIMEOUT_POLL);
return !!ready;
return !!(ready & RNG_READY);
}
static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
@ -179,6 +179,7 @@ static const struct dev_pm_ops mtk_rng_pm_ops = {
#endif /* CONFIG_PM */
static const struct of_device_id mtk_rng_match[] = {
{ .compatible = "mediatek,mt7986-rng" },
{ .compatible = "mediatek,mt7623-rng" },
{},
};

View File

@ -13,11 +13,13 @@
#include <linux/delay.h>
#include <linux/of_irq.h>
#include <linux/pm_runtime.h>
#include <linux/of_device.h>
#define NPCM_RNGCS_REG 0x00 /* Control and status register */
#define NPCM_RNGD_REG 0x04 /* Data register */
#define NPCM_RNGMODE_REG 0x08 /* Mode register */
#define NPCM_RNG_CLK_SET_62_5MHZ BIT(2) /* 60-80 MHz */
#define NPCM_RNG_CLK_SET_25MHZ GENMASK(4, 3) /* 20-25 MHz */
#define NPCM_RNG_DATA_VALID BIT(1)
#define NPCM_RNG_ENABLE BIT(0)
@ -31,14 +33,14 @@
struct npcm_rng {
void __iomem *base;
struct hwrng rng;
u32 clkp;
};
static int npcm_rng_init(struct hwrng *rng)
{
struct npcm_rng *priv = to_npcm_rng(rng);
writel(NPCM_RNG_CLK_SET_25MHZ | NPCM_RNG_ENABLE,
priv->base + NPCM_RNGCS_REG);
writel(priv->clkp | NPCM_RNG_ENABLE, priv->base + NPCM_RNGCS_REG);
return 0;
}
@ -47,7 +49,7 @@ static void npcm_rng_cleanup(struct hwrng *rng)
{
struct npcm_rng *priv = to_npcm_rng(rng);
writel(NPCM_RNG_CLK_SET_25MHZ, priv->base + NPCM_RNGCS_REG);
writel(priv->clkp, priv->base + NPCM_RNGCS_REG);
}
static int npcm_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
@ -109,7 +111,7 @@ static int npcm_rng_probe(struct platform_device *pdev)
priv->rng.name = pdev->name;
priv->rng.read = npcm_rng_read;
priv->rng.priv = (unsigned long)&pdev->dev;
priv->rng.quality = 1000;
priv->clkp = (u32)(uintptr_t)of_device_get_match_data(&pdev->dev);
writel(NPCM_RNG_M1ROSEL, priv->base + NPCM_RNGMODE_REG);
@ -162,7 +164,10 @@ static const struct dev_pm_ops npcm_rng_pm_ops = {
};
static const struct of_device_id rng_dt_id[] __maybe_unused = {
{ .compatible = "nuvoton,npcm750-rng", },
{ .compatible = "nuvoton,npcm750-rng",
.data = (void *)NPCM_RNG_CLK_SET_25MHZ },
{ .compatible = "nuvoton,npcm845-rng",
.data = (void *)NPCM_RNG_CLK_SET_62_5MHZ },
{},
};
MODULE_DEVICE_TABLE(of, rng_dt_id);

View File

@ -191,7 +191,6 @@ static struct hwrng trng_hwrng_dev = {
.name = "s390-trng",
.data_read = trng_hwrng_data_read,
.read = trng_hwrng_read,
.quality = 1024,
};

Some files were not shown because too many files have changed in this diff Show More