2019-11-08 13:22:24 +01:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
|
|
|
|
*
|
|
|
|
* Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <asm/hwcap.h>
|
|
|
|
#include <asm/neon.h>
|
|
|
|
#include <asm/simd.h>
|
2024-10-01 15:35:57 -04:00
|
|
|
#include <linux/unaligned.h>
|
2019-11-08 13:22:24 +01:00
|
|
|
#include <crypto/algapi.h>
|
|
|
|
#include <crypto/internal/hash.h>
|
|
|
|
#include <crypto/internal/poly1305.h>
|
|
|
|
#include <crypto/internal/simd.h>
|
|
|
|
#include <linux/cpufeature.h>
|
|
|
|
#include <linux/crypto.h>
|
|
|
|
#include <linux/jump_label.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
|
|
|
|
asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
|
|
|
|
asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
|
|
|
|
asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
|
2020-01-05 22:40:49 -05:00
|
|
|
asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
|
2019-11-08 13:22:24 +01:00
|
|
|
|
|
|
|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
|
|
|
|
2021-03-22 18:05:15 +01:00
|
|
|
void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
|
2019-11-08 13:22:24 +01:00
|
|
|
{
|
|
|
|
poly1305_init_arm64(&dctx->h, key);
|
|
|
|
dctx->s[0] = get_unaligned_le32(key + 16);
|
|
|
|
dctx->s[1] = get_unaligned_le32(key + 20);
|
|
|
|
dctx->s[2] = get_unaligned_le32(key + 24);
|
|
|
|
dctx->s[3] = get_unaligned_le32(key + 28);
|
|
|
|
dctx->buflen = 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(poly1305_init_arch);
|
|
|
|
|
|
|
|
static int neon_poly1305_init(struct shash_desc *desc)
|
|
|
|
{
|
|
|
|
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
|
|
|
|
|
|
|
dctx->buflen = 0;
|
|
|
|
dctx->rset = 0;
|
|
|
|
dctx->sset = false;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
|
|
|
|
u32 len, u32 hibit, bool do_neon)
|
|
|
|
{
|
|
|
|
if (unlikely(!dctx->sset)) {
|
|
|
|
if (!dctx->rset) {
|
crypto: arm64/poly1305 - fix a read out-of-bound
A kasan error was reported during fuzzing:
BUG: KASAN: slab-out-of-bounds in neon_poly1305_blocks.constprop.0+0x1b4/0x250 [poly1305_neon]
Read of size 4 at addr ffff0010e293f010 by task syz-executor.5/1646715
CPU: 4 PID: 1646715 Comm: syz-executor.5 Kdump: loaded Not tainted 5.10.0.aarch64 #1
Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.59 01/31/2019
Call trace:
dump_backtrace+0x0/0x394
show_stack+0x34/0x4c arch/arm64/kernel/stacktrace.c:196
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x158/0x1e4 lib/dump_stack.c:118
print_address_description.constprop.0+0x68/0x204 mm/kasan/report.c:387
__kasan_report+0xe0/0x140 mm/kasan/report.c:547
kasan_report+0x44/0xe0 mm/kasan/report.c:564
check_memory_region_inline mm/kasan/generic.c:187 [inline]
__asan_load4+0x94/0xd0 mm/kasan/generic.c:252
neon_poly1305_blocks.constprop.0+0x1b4/0x250 [poly1305_neon]
neon_poly1305_do_update+0x6c/0x15c [poly1305_neon]
neon_poly1305_update+0x9c/0x1c4 [poly1305_neon]
crypto_shash_update crypto/shash.c:131 [inline]
shash_finup_unaligned+0x84/0x15c crypto/shash.c:179
crypto_shash_finup+0x8c/0x140 crypto/shash.c:193
shash_digest_unaligned+0xb8/0xe4 crypto/shash.c:201
crypto_shash_digest+0xa4/0xfc crypto/shash.c:217
crypto_shash_tfm_digest+0xb4/0x150 crypto/shash.c:229
essiv_skcipher_setkey+0x164/0x200 [essiv]
crypto_skcipher_setkey+0xb0/0x160 crypto/skcipher.c:612
skcipher_setkey+0x3c/0x50 crypto/algif_skcipher.c:305
alg_setkey+0x114/0x2a0 crypto/af_alg.c:220
alg_setsockopt+0x19c/0x210 crypto/af_alg.c:253
__sys_setsockopt+0x190/0x2e0 net/socket.c:2123
__do_sys_setsockopt net/socket.c:2134 [inline]
__se_sys_setsockopt net/socket.c:2131 [inline]
__arm64_sys_setsockopt+0x78/0x94 net/socket.c:2131
__invoke_syscall arch/arm64/kernel/syscall.c:36 [inline]
invoke_syscall+0x64/0x100 arch/arm64/kernel/syscall.c:48
el0_svc_common.constprop.0+0x220/0x230 arch/arm64/kernel/syscall.c:155
do_el0_svc+0xb4/0xd4 arch/arm64/kernel/syscall.c:217
el0_svc+0x24/0x3c arch/arm64/kernel/entry-common.c:353
el0_sync_handler+0x160/0x164 arch/arm64/kernel/entry-common.c:369
el0_sync+0x160/0x180 arch/arm64/kernel/entry.S:683
This error can be reproduced by the following code compiled as ko on a
system with kasan enabled:
#include <linux/module.h>
#include <linux/crypto.h>
#include <crypto/hash.h>
#include <crypto/poly1305.h>
char test_data[] = "\x00\x01\x02\x03\x04\x05\x06\x07"
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
"\x10\x11\x12\x13\x14\x15\x16\x17"
"\x18\x19\x1a\x1b\x1c\x1d\x1e";
int init(void)
{
struct crypto_shash *tfm = NULL;
char *data = NULL, *out = NULL;
tfm = crypto_alloc_shash("poly1305", 0, 0);
data = kmalloc(POLY1305_KEY_SIZE - 1, GFP_KERNEL);
out = kmalloc(POLY1305_DIGEST_SIZE, GFP_KERNEL);
memcpy(data, test_data, POLY1305_KEY_SIZE - 1);
crypto_shash_tfm_digest(tfm, data, POLY1305_KEY_SIZE - 1, out);
kfree(data);
kfree(out);
return 0;
}
void deinit(void)
{
}
module_init(init)
module_exit(deinit)
MODULE_LICENSE("GPL");
The root cause of the bug sits in neon_poly1305_blocks. The logic
neon_poly1305_blocks() performed is that if it was called with both s[]
and r[] uninitialized, it will first try to initialize them with the
data from the first "block" that it believed to be 32 bytes in length.
First 16 bytes are used as the key and the next 16 bytes for s[]. This
would lead to the aforementioned read out-of-bound. However, after
calling poly1305_init_arch(), only 16 bytes were deducted from the input
and s[] is initialized yet again with the following 16 bytes. The second
initialization of s[] is certainly redundent which indicates that the
first initialization should be for r[] only.
This patch fixes the issue by calling poly1305_init_arm64() instead of
poly1305_init_arch(). This is also the implementation for the same
algorithm on arm platform.
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Cc: stable@vger.kernel.org
Signed-off-by: GUO Zihua <guozihua@huawei.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2022-07-22 14:31:57 +08:00
|
|
|
poly1305_init_arm64(&dctx->h, src);
|
2019-11-08 13:22:24 +01:00
|
|
|
src += POLY1305_BLOCK_SIZE;
|
|
|
|
len -= POLY1305_BLOCK_SIZE;
|
|
|
|
dctx->rset = 1;
|
|
|
|
}
|
|
|
|
if (len >= POLY1305_BLOCK_SIZE) {
|
|
|
|
dctx->s[0] = get_unaligned_le32(src + 0);
|
|
|
|
dctx->s[1] = get_unaligned_le32(src + 4);
|
|
|
|
dctx->s[2] = get_unaligned_le32(src + 8);
|
|
|
|
dctx->s[3] = get_unaligned_le32(src + 12);
|
|
|
|
src += POLY1305_BLOCK_SIZE;
|
|
|
|
len -= POLY1305_BLOCK_SIZE;
|
|
|
|
dctx->sset = true;
|
|
|
|
}
|
|
|
|
if (len < POLY1305_BLOCK_SIZE)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
len &= ~(POLY1305_BLOCK_SIZE - 1);
|
|
|
|
|
|
|
|
if (static_branch_likely(&have_neon) && likely(do_neon))
|
|
|
|
poly1305_blocks_neon(&dctx->h, src, len, hibit);
|
|
|
|
else
|
|
|
|
poly1305_blocks(&dctx->h, src, len, hibit);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
|
|
|
|
const u8 *src, u32 len, bool do_neon)
|
|
|
|
{
|
|
|
|
if (unlikely(dctx->buflen)) {
|
|
|
|
u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
|
|
|
|
|
|
|
|
memcpy(dctx->buf + dctx->buflen, src, bytes);
|
|
|
|
src += bytes;
|
|
|
|
len -= bytes;
|
|
|
|
dctx->buflen += bytes;
|
|
|
|
|
|
|
|
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
|
|
|
|
neon_poly1305_blocks(dctx, dctx->buf,
|
|
|
|
POLY1305_BLOCK_SIZE, 1, false);
|
|
|
|
dctx->buflen = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (likely(len >= POLY1305_BLOCK_SIZE)) {
|
|
|
|
neon_poly1305_blocks(dctx, src, len, 1, do_neon);
|
|
|
|
src += round_down(len, POLY1305_BLOCK_SIZE);
|
|
|
|
len %= POLY1305_BLOCK_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(len)) {
|
|
|
|
dctx->buflen = len;
|
|
|
|
memcpy(dctx->buf, src, len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int neon_poly1305_update(struct shash_desc *desc,
|
|
|
|
const u8 *src, unsigned int srclen)
|
|
|
|
{
|
|
|
|
bool do_neon = crypto_simd_usable() && srclen > 128;
|
|
|
|
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
|
|
|
|
|
|
|
if (static_branch_likely(&have_neon) && do_neon)
|
|
|
|
kernel_neon_begin();
|
|
|
|
neon_poly1305_do_update(dctx, src, srclen, do_neon);
|
|
|
|
if (static_branch_likely(&have_neon) && do_neon)
|
|
|
|
kernel_neon_end();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
|
|
|
|
unsigned int nbytes)
|
|
|
|
{
|
|
|
|
if (unlikely(dctx->buflen)) {
|
|
|
|
u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
|
|
|
|
|
|
|
|
memcpy(dctx->buf + dctx->buflen, src, bytes);
|
|
|
|
src += bytes;
|
|
|
|
nbytes -= bytes;
|
|
|
|
dctx->buflen += bytes;
|
|
|
|
|
|
|
|
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
|
|
|
|
poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
|
|
|
|
dctx->buflen = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
|
|
|
|
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
|
|
|
|
|
|
|
|
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
|
crypto: arch/lib - limit simd usage to 4k chunks
The initial Zinc patchset, after some mailing list discussion, contained
code to ensure that kernel_fpu_enable would not be kept on for more than
a 4k chunk, since it disables preemption. The choice of 4k isn't totally
scientific, but it's not a bad guess either, and it's what's used in
both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form
of PAGE_SIZE, which this commit corrects to be explicitly 4k for the
former two).
Ard did some back of the envelope calculations and found that
at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k
means we have a maximum preemption disabling of 20us, which Sebastian
confirmed was probably a good limit.
Unfortunately the chunking appears to have been left out of the final
patchset that added the glue code. So, this commit adds it back in.
Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function")
Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel")
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation")
Cc: Eric Biggers <ebiggers@google.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-04-22 17:18:53 -06:00
|
|
|
do {
|
|
|
|
unsigned int todo = min_t(unsigned int, len, SZ_4K);
|
|
|
|
|
|
|
|
kernel_neon_begin();
|
|
|
|
poly1305_blocks_neon(&dctx->h, src, todo, 1);
|
|
|
|
kernel_neon_end();
|
|
|
|
|
|
|
|
len -= todo;
|
|
|
|
src += todo;
|
|
|
|
} while (len);
|
2019-11-08 13:22:24 +01:00
|
|
|
} else {
|
|
|
|
poly1305_blocks(&dctx->h, src, len, 1);
|
crypto: arch/lib - limit simd usage to 4k chunks
The initial Zinc patchset, after some mailing list discussion, contained
code to ensure that kernel_fpu_enable would not be kept on for more than
a 4k chunk, since it disables preemption. The choice of 4k isn't totally
scientific, but it's not a bad guess either, and it's what's used in
both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form
of PAGE_SIZE, which this commit corrects to be explicitly 4k for the
former two).
Ard did some back of the envelope calculations and found that
at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k
means we have a maximum preemption disabling of 20us, which Sebastian
confirmed was probably a good limit.
Unfortunately the chunking appears to have been left out of the final
patchset that added the glue code. So, this commit adds it back in.
Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function")
Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel")
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation")
Cc: Eric Biggers <ebiggers@google.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-04-22 17:18:53 -06:00
|
|
|
src += len;
|
2019-11-08 13:22:24 +01:00
|
|
|
}
|
|
|
|
nbytes %= POLY1305_BLOCK_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(nbytes)) {
|
|
|
|
dctx->buflen = nbytes;
|
|
|
|
memcpy(dctx->buf, src, nbytes);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(poly1305_update_arch);
|
|
|
|
|
|
|
|
void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
|
|
|
|
{
|
|
|
|
if (unlikely(dctx->buflen)) {
|
|
|
|
dctx->buf[dctx->buflen++] = 1;
|
|
|
|
memset(dctx->buf + dctx->buflen, 0,
|
|
|
|
POLY1305_BLOCK_SIZE - dctx->buflen);
|
|
|
|
poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
|
|
|
|
}
|
|
|
|
|
2020-01-05 22:40:49 -05:00
|
|
|
poly1305_emit(&dctx->h, dst, dctx->s);
|
2020-10-25 10:31:15 -04:00
|
|
|
memzero_explicit(dctx, sizeof(*dctx));
|
2019-11-08 13:22:24 +01:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(poly1305_final_arch);
|
|
|
|
|
|
|
|
static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
|
|
|
|
{
|
|
|
|
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
|
|
|
|
|
|
|
if (unlikely(!dctx->sset))
|
|
|
|
return -ENOKEY;
|
|
|
|
|
|
|
|
poly1305_final_arch(dctx, dst);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct shash_alg neon_poly1305_alg = {
|
|
|
|
.init = neon_poly1305_init,
|
|
|
|
.update = neon_poly1305_update,
|
|
|
|
.final = neon_poly1305_final,
|
|
|
|
.digestsize = POLY1305_DIGEST_SIZE,
|
|
|
|
.descsize = sizeof(struct poly1305_desc_ctx),
|
|
|
|
|
|
|
|
.base.cra_name = "poly1305",
|
|
|
|
.base.cra_driver_name = "poly1305-neon",
|
|
|
|
.base.cra_priority = 200,
|
|
|
|
.base.cra_blocksize = POLY1305_BLOCK_SIZE,
|
|
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init neon_poly1305_mod_init(void)
|
|
|
|
{
|
|
|
|
if (!cpu_have_named_feature(ASIMD))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
static_branch_enable(&have_neon);
|
|
|
|
|
2019-11-25 11:31:12 +01:00
|
|
|
return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
|
|
|
|
crypto_register_shash(&neon_poly1305_alg) : 0;
|
2019-11-08 13:22:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit neon_poly1305_mod_exit(void)
|
|
|
|
{
|
2019-11-25 11:31:12 +01:00
|
|
|
if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
|
2019-11-08 13:22:24 +01:00
|
|
|
crypto_unregister_shash(&neon_poly1305_alg);
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(neon_poly1305_mod_init);
|
|
|
|
module_exit(neon_poly1305_mod_exit);
|
|
|
|
|
2024-06-12 13:11:57 -07:00
|
|
|
MODULE_DESCRIPTION("Poly1305 transform using NEON instructions");
|
2019-11-08 13:22:24 +01:00
|
|
|
MODULE_LICENSE("GPL v2");
|
|
|
|
MODULE_ALIAS_CRYPTO("poly1305");
|
|
|
|
MODULE_ALIAS_CRYPTO("poly1305-neon");
|