crypto: arm/aes-neonbs - go back to using aes-arm directly

In aes-neonbs, instead of going through the crypto API for the parts
that the bit-sliced AES code doesn't handle, namely AES-CBC encryption
and single-block AES, just call the ARM scalar AES cipher directly.

This basically goes back to the original approach that was used before
commit b56f5cbc7e ("crypto: arm/aes-neonbs - resolve fallback cipher
at runtime").  Calling the ARM scalar AES cipher directly is faster,
simpler, and avoids any chance of bugs specific to the use of fallback
ciphers such as module loading deadlocks which have happened twice.  The
deadlocks turned out to be fixable in other ways, but there's no need to
rely on anything so fragile in the first place.

The rationale for the above-mentioned commit was to allow people to
choose to use a time-invariant AES implementation for the fallback
cipher.  There are a couple problems with that rationale, though:

- In practice the ARM scalar AES cipher (aes-arm) was used anyway, since
  it has a higher priority than aes-fixed-time.  Users *could* go out of
  their way to disable or blacklist aes-arm, or to lower its priority
  using NETLINK_CRYPTO, but very few users customize the crypto API to
  this extent.  Systems with the ARMv8 Crypto Extensions used aes-ce,
  but the bit-sliced algorithms are irrelevant on such systems anyway.

- Since commit 913a3aa07d ("crypto: arm/aes - add some hardening
  against cache-timing attacks"), the ARM scalar AES cipher is partially
  hardened against cache-timing attacks.  It actually works like
  aes-fixed-time, in that it disables interrupts and prefetches its
  lookup table.  It does use a larger table than aes-fixed-time, but
  even so, it is not clear that aes-fixed-time is meaningfully more
  time-invariant than aes-arm.  And of course, the real solution for
  time-invariant AES is to use a CPU that supports AES instructions.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Eric Biggers 2024-08-09 16:11:49 -07:00 committed by Herbert Xu
parent 9a22b28123
commit f235bc11cc
4 changed files with 66 additions and 95 deletions

View File

@ -166,10 +166,9 @@ config CRYPTO_AES_ARM
config CRYPTO_AES_ARM_BS config CRYPTO_AES_ARM_BS
tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (bit-sliced NEON)" tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (bit-sliced NEON)"
depends on KERNEL_MODE_NEON depends on KERNEL_MODE_NEON
select CRYPTO_AES_ARM
select CRYPTO_SKCIPHER select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES select CRYPTO_LIB_AES
select CRYPTO_AES
select CRYPTO_CBC
select CRYPTO_SIMD select CRYPTO_SIMD
help help
Length-preserving ciphers: AES cipher algorithms (FIPS-197) Length-preserving ciphers: AES cipher algorithms (FIPS-197)
@ -183,8 +182,15 @@ config CRYPTO_AES_ARM_BS
Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
and for XTS mode encryption, CBC and XTS mode decryption speedup is and for XTS mode encryption, CBC and XTS mode decryption speedup is
around 25%. (CBC encryption speed is not affected by this driver.) around 25%. (CBC encryption speed is not affected by this driver.)
This implementation does not rely on any lookup tables so it is
believed to be invulnerable to cache timing attacks. The bit sliced AES code does not use lookup tables, so it is believed
to be invulnerable to cache timing attacks. However, since the bit
sliced AES code cannot process single blocks efficiently, in certain
cases table-based code with some countermeasures against cache timing
attacks will still be used as a fallback method; specifically CBC
encryption (not CBC decryption), the encryption of XTS tweaks, XTS
ciphertext stealing when the message isn't a multiple of 16 bytes, and
CTR when invoked in a context in which NEON instructions are unusable.
config CRYPTO_AES_ARM_CE config CRYPTO_AES_ARM_CE
tristate "Ciphers: AES, modes: ECB/CBC/CTS/CTR/XTS (ARMv8 Crypto Extensions)" tristate "Ciphers: AES, modes: ECB/CBC/CTS/CTR/XTS (ARMv8 Crypto Extensions)"

View File

@ -9,9 +9,10 @@
#include <crypto/aes.h> #include <crypto/aes.h>
#include <crypto/algapi.h> #include <crypto/algapi.h>
#include <linux/module.h> #include <linux/module.h>
#include "aes-cipher.h"
asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out); EXPORT_SYMBOL_GPL(__aes_arm_encrypt);
asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out); EXPORT_SYMBOL_GPL(__aes_arm_decrypt);
static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{ {

View File

@ -0,0 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef ARM_CRYPTO_AES_CIPHER_H
#define ARM_CRYPTO_AES_CIPHER_H
#include <linux/linkage.h>
#include <linux/types.h>
asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds,
const u8 *in, u8 *out);
asmlinkage void __aes_arm_decrypt(const u32 rk[], int rounds,
const u8 *in, u8 *out);
#endif /* ARM_CRYPTO_AES_CIPHER_H */

View File

@ -9,24 +9,22 @@
#include <asm/simd.h> #include <asm/simd.h>
#include <crypto/aes.h> #include <crypto/aes.h>
#include <crypto/ctr.h> #include <crypto/ctr.h>
#include <crypto/internal/cipher.h>
#include <crypto/internal/simd.h> #include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h> #include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h> #include <crypto/scatterwalk.h>
#include <crypto/xts.h> #include <crypto/xts.h>
#include <linux/module.h> #include <linux/module.h>
#include "aes-cipher.h"
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_DESCRIPTION("Bit sliced AES using NEON instructions"); MODULE_DESCRIPTION("Bit sliced AES using NEON instructions");
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("ecb(aes)"); MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)-all"); MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)");
MODULE_IMPORT_NS(CRYPTO_INTERNAL);
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds); asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
@ -52,13 +50,13 @@ struct aesbs_ctx {
struct aesbs_cbc_ctx { struct aesbs_cbc_ctx {
struct aesbs_ctx key; struct aesbs_ctx key;
struct crypto_skcipher *enc_tfm; struct crypto_aes_ctx fallback;
}; };
struct aesbs_xts_ctx { struct aesbs_xts_ctx {
struct aesbs_ctx key; struct aesbs_ctx key;
struct crypto_cipher *cts_tfm; struct crypto_aes_ctx fallback;
struct crypto_cipher *tweak_tfm; struct crypto_aes_ctx tweak_key;
}; };
struct aesbs_ctr_ctx { struct aesbs_ctr_ctx {
@ -129,37 +127,49 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len) unsigned int key_len)
{ {
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
struct crypto_aes_ctx rk;
int err; int err;
err = aes_expandkey(&rk, in_key, key_len); err = aes_expandkey(&ctx->fallback, in_key, key_len);
if (err) if (err)
return err; return err;
ctx->key.rounds = 6 + key_len / 4; ctx->key.rounds = 6 + key_len / 4;
kernel_neon_begin(); kernel_neon_begin();
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
kernel_neon_end(); kernel_neon_end();
memzero_explicit(&rk, sizeof(rk));
return crypto_skcipher_setkey(ctx->enc_tfm, in_key, key_len); return 0;
} }
static int cbc_encrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req)
{ {
struct skcipher_request *subreq = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); const struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
skcipher_request_set_tfm(subreq, ctx->enc_tfm); err = skcipher_walk_virt(&walk, req, false);
skcipher_request_set_callback(subreq,
skcipher_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst,
req->cryptlen, req->iv);
return crypto_skcipher_encrypt(subreq); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
u8 *prev = walk.iv;
do {
crypto_xor_cpy(dst, src, prev, AES_BLOCK_SIZE);
__aes_arm_encrypt(ctx->fallback.key_enc,
ctx->key.rounds, dst, dst);
prev = dst;
src += AES_BLOCK_SIZE;
dst += AES_BLOCK_SIZE;
nbytes -= AES_BLOCK_SIZE;
} while (nbytes >= AES_BLOCK_SIZE);
memcpy(walk.iv, prev, AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, nbytes);
}
return err;
} }
static int cbc_decrypt(struct skcipher_request *req) static int cbc_decrypt(struct skcipher_request *req)
@ -190,30 +200,6 @@ static int cbc_decrypt(struct skcipher_request *req)
return err; return err;
} }
static int cbc_init(struct crypto_skcipher *tfm)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
unsigned int reqsize;
ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(ctx->enc_tfm))
return PTR_ERR(ctx->enc_tfm);
reqsize = sizeof(struct skcipher_request);
reqsize += crypto_skcipher_reqsize(ctx->enc_tfm);
crypto_skcipher_set_reqsize(tfm, reqsize);
return 0;
}
static void cbc_exit(struct crypto_skcipher *tfm)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
crypto_free_skcipher(ctx->enc_tfm);
}
static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key, static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len) unsigned int key_len)
{ {
@ -271,16 +257,8 @@ static int ctr_encrypt(struct skcipher_request *req)
static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
{ {
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
unsigned long flags;
/* __aes_arm_encrypt(ctx->fallback.key_enc, ctx->key.rounds, src, dst);
* Temporarily disable interrupts to avoid races where
* cachelines are evicted when the CPU is interrupted
* to do something else.
*/
local_irq_save(flags);
aes_encrypt(&ctx->fallback, dst, src);
local_irq_restore(flags);
} }
static int ctr_encrypt_sync(struct skcipher_request *req) static int ctr_encrypt_sync(struct skcipher_request *req)
@ -302,45 +280,23 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
return err; return err;
key_len /= 2; key_len /= 2;
err = crypto_cipher_setkey(ctx->cts_tfm, in_key, key_len); err = aes_expandkey(&ctx->fallback, in_key, key_len);
if (err) if (err)
return err; return err;
err = crypto_cipher_setkey(ctx->tweak_tfm, in_key + key_len, key_len); err = aes_expandkey(&ctx->tweak_key, in_key + key_len, key_len);
if (err) if (err)
return err; return err;
return aesbs_setkey(tfm, in_key, key_len); return aesbs_setkey(tfm, in_key, key_len);
} }
static int xts_init(struct crypto_skcipher *tfm)
{
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->cts_tfm = crypto_alloc_cipher("aes", 0, 0);
if (IS_ERR(ctx->cts_tfm))
return PTR_ERR(ctx->cts_tfm);
ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0);
if (IS_ERR(ctx->tweak_tfm))
crypto_free_cipher(ctx->cts_tfm);
return PTR_ERR_OR_ZERO(ctx->tweak_tfm);
}
static void xts_exit(struct crypto_skcipher *tfm)
{
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
crypto_free_cipher(ctx->tweak_tfm);
crypto_free_cipher(ctx->cts_tfm);
}
static int __xts_crypt(struct skcipher_request *req, bool encrypt, static int __xts_crypt(struct skcipher_request *req, bool encrypt,
void (*fn)(u8 out[], u8 const in[], u8 const rk[], void (*fn)(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[], int)) int rounds, int blocks, u8 iv[], int))
{ {
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
const int rounds = ctx->key.rounds;
int tail = req->cryptlen % AES_BLOCK_SIZE; int tail = req->cryptlen % AES_BLOCK_SIZE;
struct skcipher_request subreq; struct skcipher_request subreq;
u8 buf[2 * AES_BLOCK_SIZE]; u8 buf[2 * AES_BLOCK_SIZE];
@ -364,7 +320,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
if (err) if (err)
return err; return err;
crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv); __aes_arm_encrypt(ctx->tweak_key.key_enc, rounds, walk.iv, walk.iv);
while (walk.nbytes >= AES_BLOCK_SIZE) { while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@ -378,7 +334,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
kernel_neon_begin(); kernel_neon_begin();
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
ctx->key.rounds, blocks, walk.iv, reorder_last_tweak); rounds, blocks, walk.iv, reorder_last_tweak);
kernel_neon_end(); kernel_neon_end();
err = skcipher_walk_done(&walk, err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE); walk.nbytes - blocks * AES_BLOCK_SIZE);
@ -396,9 +352,9 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
crypto_xor(buf, req->iv, AES_BLOCK_SIZE); crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
if (encrypt) if (encrypt)
crypto_cipher_encrypt_one(ctx->cts_tfm, buf, buf); __aes_arm_encrypt(ctx->fallback.key_enc, rounds, buf, buf);
else else
crypto_cipher_decrypt_one(ctx->cts_tfm, buf, buf); __aes_arm_decrypt(ctx->fallback.key_dec, rounds, buf, buf);
crypto_xor(buf, req->iv, AES_BLOCK_SIZE); crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
@ -439,8 +395,7 @@ static struct skcipher_alg aes_algs[] = { {
.base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.base.cra_module = THIS_MODULE, .base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL | .base.cra_flags = CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_NEED_FALLBACK,
.min_keysize = AES_MIN_KEY_SIZE, .min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE,
@ -449,8 +404,6 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = aesbs_cbc_setkey, .setkey = aesbs_cbc_setkey,
.encrypt = cbc_encrypt, .encrypt = cbc_encrypt,
.decrypt = cbc_decrypt, .decrypt = cbc_decrypt,
.init = cbc_init,
.exit = cbc_exit,
}, { }, {
.base.cra_name = "__ctr(aes)", .base.cra_name = "__ctr(aes)",
.base.cra_driver_name = "__ctr-aes-neonbs", .base.cra_driver_name = "__ctr-aes-neonbs",
@ -500,8 +453,6 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = aesbs_xts_setkey, .setkey = aesbs_xts_setkey,
.encrypt = xts_encrypt, .encrypt = xts_encrypt,
.decrypt = xts_decrypt, .decrypt = xts_decrypt,
.init = xts_init,
.exit = xts_exit,
} }; } };
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];