crypto: arm64/aes-ccm - Reuse existing MAC update for AAD input

CCM combines the counter (CTR) encryption mode with a MAC based on the
same block cipher. This MAC construction is a bit clunky: it invokes the
block cipher in a way that cannot be parallelized, resulting in poor CPU
pipeline efficiency.

The arm64 CCM code mitigates this by interleaving the encryption and MAC
at the AES round level, resulting in a substantial speedup. But this
approach does not apply to the additional authenticated data (AAD) which
is not encrypted.

This means the special asm routine dealing with the AAD is not any
better than the MAC update routine used by the arm64 AES block
encryption driver, so let's reuse that, and drop the special AES-CCM
version.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Ard Biesheuvel 2024-01-18 18:06:34 +01:00 committed by Herbert Xu
parent c131098d6d
commit 948ffc66e5
4 changed files with 43 additions and 79 deletions

View File

@ -268,6 +268,7 @@ config CRYPTO_AES_ARM64_CE_CCM
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64_CE_BLK
select CRYPTO_AEAD
select CRYPTO_LIB_AES
help

View File

@ -14,77 +14,6 @@
.text
.arch armv8-a+crypto
/*
* u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
* u32 macp, u8 const rk[], u32 rounds);
*/
SYM_FUNC_START(ce_aes_ccm_auth_data)
ld1 {v0.16b}, [x0] /* load mac */
cbz w3, 1f
sub w3, w3, #16
eor v1.16b, v1.16b, v1.16b
0: ldrb w7, [x1], #1 /* get 1 byte of input */
subs w2, w2, #1
add w3, w3, #1
ins v1.b[0], w7
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
beq 8f /* out of input? */
cbnz w3, 0b
eor v0.16b, v0.16b, v1.16b
1: ld1 {v3.4s}, [x4] /* load first round key */
prfm pldl1strm, [x1]
cmp w5, #12 /* which key size? */
add x6, x4, #16
sub w7, w5, #2 /* modified # of rounds */
bmi 2f
bne 5f
mov v5.16b, v3.16b
b 4f
2: mov v4.16b, v3.16b
ld1 {v5.4s}, [x6], #16 /* load 2nd round key */
3: aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
4: ld1 {v3.4s}, [x6], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
5: ld1 {v4.4s}, [x6], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
ld1 {v5.4s}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
subs w2, w2, #16 /* last data? */
eor v0.16b, v0.16b, v5.16b /* final round */
bmi 6f
ld1 {v1.16b}, [x1], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */
bne 1b
6: st1 {v0.16b}, [x0] /* store mac */
beq 10f
adds w2, w2, #16
beq 10f
mov w3, w2
7: ldrb w7, [x1], #1
umov w6, v0.b[0]
eor w6, w6, w7
strb w6, [x0], #1
subs w2, w2, #1
beq 10f
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
b 7b
8: cbz w3, 91f
mov w7, w3
add w3, w3, #16
9: ext v1.16b, v1.16b, v1.16b, #1
adds w7, w7, #1
bne 9b
91: eor v0.16b, v0.16b, v1.16b
st1 {v0.16b}, [x0]
10: mov w0, w3
ret
SYM_FUNC_END(ce_aes_ccm_auth_data)
/*
* void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
* u32 rounds);

View File

@ -18,6 +18,8 @@
#include "aes-ce-setkey.h"
MODULE_IMPORT_NS(CRYPTO_INTERNAL);
static int num_rounds(struct crypto_aes_ctx *ctx)
{
/*
@ -30,8 +32,9 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
return 6 + ctx->key_length / 4;
}
asmlinkage u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
u32 macp, u32 const rk[], u32 rounds);
asmlinkage u32 ce_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
int blocks, u8 dg[], int enc_before,
int enc_after);
asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
u32 const rk[], u32 rounds, u8 mac[],
@ -97,6 +100,41 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
return 0;
}
static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
u32 macp, u32 const rk[], u32 rounds)
{
int enc_after = (macp + abytes) % AES_BLOCK_SIZE;
do {
u32 blocks = abytes / AES_BLOCK_SIZE;
if (macp == AES_BLOCK_SIZE || (!macp && blocks > 0)) {
u32 rem = ce_aes_mac_update(in, rk, rounds, blocks, mac,
macp, enc_after);
u32 adv = (blocks - rem) * AES_BLOCK_SIZE;
macp = enc_after ? 0 : AES_BLOCK_SIZE;
in += adv;
abytes -= adv;
if (unlikely(rem)) {
kernel_neon_end();
kernel_neon_begin();
macp = 0;
}
} else {
u32 l = min(AES_BLOCK_SIZE - macp, abytes);
crypto_xor(&mac[macp], in, l);
in += l;
macp += l;
abytes -= l;
}
} while (abytes > 0);
return macp;
}
static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
@ -104,7 +142,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
struct __packed { __be16 l; __be32 h; u16 len; } ltag;
struct scatter_walk walk;
u32 len = req->assoclen;
u32 macp = 0;
u32 macp = AES_BLOCK_SIZE;
/* prepend the AAD with a length tag */
if (len < 0xff00) {
@ -128,16 +166,11 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
scatterwalk_start(&walk, sg_next(walk.sg));
n = scatterwalk_clamp(&walk, len);
}
n = min_t(u32, n, SZ_4K); /* yield NEON at least every 4k */
p = scatterwalk_map(&walk);
macp = ce_aes_ccm_auth_data(mac, p, n, macp, ctx->key_enc,
num_rounds(ctx));
if (len / SZ_4K > (len - n) / SZ_4K) {
kernel_neon_end();
kernel_neon_begin();
}
len -= n;
scatterwalk_unmap(p);

View File

@ -1048,6 +1048,7 @@ static int __init aes_init(void)
#ifdef USE_V8_CRYPTO_EXTENSIONS
module_cpu_feature_match(AES, aes_init);
EXPORT_SYMBOL_NS(ce_aes_mac_update, CRYPTO_INTERNAL);
#else
module_init(aes_init);
EXPORT_SYMBOL(neon_aes_ecb_encrypt);