crypto: arm/blake2s - fix for big endian

The new ARM BLAKE2s code doesn't work correctly (fails the self-tests)
in big endian kernel builds because it doesn't swap the endianness of
the message words when loading them.  Fix this.

Fixes: 5172d322d3 ("crypto: arm/blake2s - add ARM scalar optimized BLAKE2s")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Eric Biggers 2021-03-09 23:27:26 -08:00 committed by Herbert Xu
parent 0914999744
commit d2f2516a38

View File

@ -8,6 +8,7 @@
*/ */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/assembler.h>
// Registers used to hold message words temporarily. There aren't // Registers used to hold message words temporarily. There aren't
// enough ARM registers to hold the whole message block, so we have to // enough ARM registers to hold the whole message block, so we have to
@ -38,6 +39,23 @@
#endif #endif
.endm .endm
.macro _le32_bswap a, tmp
#ifdef __ARMEB__
rev_l \a, \tmp
#endif
.endm
.macro _le32_bswap_8x a, b, c, d, e, f, g, h, tmp
_le32_bswap \a, \tmp
_le32_bswap \b, \tmp
_le32_bswap \c, \tmp
_le32_bswap \d, \tmp
_le32_bswap \e, \tmp
_le32_bswap \f, \tmp
_le32_bswap \g, \tmp
_le32_bswap \h, \tmp
.endm
// Execute a quarter-round of BLAKE2s by mixing two columns or two diagonals. // Execute a quarter-round of BLAKE2s by mixing two columns or two diagonals.
// (a0, b0, c0, d0) and (a1, b1, c1, d1) give the registers containing the two // (a0, b0, c0, d0) and (a1, b1, c1, d1) give the registers containing the two
// columns/diagonals. s0-s1 are the word offsets to the message words the first // columns/diagonals. s0-s1 are the word offsets to the message words the first
@ -180,8 +198,10 @@ ENTRY(blake2s_compress_arch)
tst r1, #3 tst r1, #3
bne .Lcopy_block_misaligned bne .Lcopy_block_misaligned
ldmia r1!, {r2-r9} ldmia r1!, {r2-r9}
_le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14
stmia r12!, {r2-r9} stmia r12!, {r2-r9}
ldmia r1!, {r2-r9} ldmia r1!, {r2-r9}
_le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14
stmia r12, {r2-r9} stmia r12, {r2-r9}
.Lcopy_block_done: .Lcopy_block_done:
str r1, [sp, #68] // Update message pointer str r1, [sp, #68] // Update message pointer
@ -268,6 +288,7 @@ ENTRY(blake2s_compress_arch)
1: 1:
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
ldr r3, [r1], #4 ldr r3, [r1], #4
_le32_bswap r3, r4
#else #else
ldrb r3, [r1, #0] ldrb r3, [r1, #0]
ldrb r4, [r1, #1] ldrb r4, [r1, #1]