linux/lib/chacha20.c
Eric Biggers a5e9f55709 crypto: chacha20 - Fix chacha20_block() keystream alignment (again)
In commit 9f480faec58c ("crypto: chacha20 - Fix keystream alignment for
chacha20_block()"), I had missed that chacha20_block() can be called
directly on the buffer passed to get_random_bytes(), which can have any
alignment.  So, while my commit didn't break anything, it didn't fully
solve the alignment problems.

Revert my solution and just update chacha20_block() to use
put_unaligned_le32(), so the output buffer need not be aligned.
This is simpler, and on many CPUs it's the same speed.

But, I kept the 'tmp' buffers in extract_crng_user() and
_get_random_bytes() 4-byte aligned, since that alignment is actually
needed for _crng_backtrack_protect() too.

Reported-by: Stephan Müller <smueller@chronox.de>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-09-21 13:24:50 +08:00

75 lines
2.5 KiB
C

/*
* ChaCha20 256-bit cipher algorithm, RFC7539
*
* Copyright (C) 2015 Martin Willi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/bitops.h>
#include <linux/cryptohash.h>
#include <asm/unaligned.h>
#include <crypto/chacha20.h>
void chacha20_block(u32 *state, u8 *stream)
{
u32 x[16];
int i;
for (i = 0; i < ARRAY_SIZE(x); i++)
x[i] = state[i];
for (i = 0; i < 20; i += 2) {
x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
}
for (i = 0; i < ARRAY_SIZE(x); i++)
put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
state[12]++;
}
EXPORT_SYMBOL(chacha20_block);