mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 13:43:51 +00:00
a69cb445f7
The ARM version of the accelerated XOR routines are simply the 8-way C routines passed through the auto-vectorizer with SIMD codegen enabled. This used to require GCC version 4.6 at least, but given that 5.1 is now the baseline, this check is no longer necessary, and actually misidentifies Clang as GCC < 4.6 as Clang defines the GCC major/minor as well, but makes no attempt at doing this in a way that conveys feature parity with a certain version of GCC (which would not be a great idea in the first place). So let's drop the version check, and make the auto-vectorize pragma (which is based on a GCC-specific command line option) GCC-only. Since Clang performs SIMD auto-vectorization by default at -O2, no pragma is necessary here. Tested-by: Nathan Chancellor <nathan@kernel.org> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com> Link: https://github.com/ClangBuiltLinux/linux/issues/496 Link: https://github.com/ClangBuiltLinux/linux/issues/503 Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
38 lines
939 B
C
38 lines
939 B
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* linux/arch/arm/lib/xor-neon.c
|
|
*
|
|
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
|
*/
|
|
|
|
#include <linux/raid/xor.h>
|
|
#include <linux/module.h>
|
|
|
|
MODULE_LICENSE("GPL");
|
|
|
|
#ifndef __ARM_NEON__
|
|
#error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon'
|
|
#endif
|
|
|
|
/*
|
|
* Pull in the reference implementations while instructing GCC (through
|
|
* -ftree-vectorize) to attempt to exploit implicit parallelism and emit
|
|
* NEON instructions. Clang does this by default at O2 so no pragma is
|
|
* needed.
|
|
*/
|
|
#ifdef CONFIG_CC_IS_GCC
|
|
#pragma GCC optimize "tree-vectorize"
|
|
#endif
|
|
|
|
#pragma GCC diagnostic ignored "-Wunused-variable"
|
|
#include <asm-generic/xor.h>
|
|
|
|
struct xor_block_template const xor_block_neon_inner = {
|
|
.name = "__inner_neon__",
|
|
.do_2 = xor_8regs_2,
|
|
.do_3 = xor_8regs_3,
|
|
.do_4 = xor_8regs_4,
|
|
.do_5 = xor_8regs_5,
|
|
};
|
|
EXPORT_SYMBOL(xor_block_neon_inner);
|