From 4f3d1be4c2f8a22470f3625cbc778ba2e2130def Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Thu, 14 Nov 2024 02:18:32 +0900 Subject: [PATCH 1/4] compiler.h: add const_true() __builtin_constant_p() is known for not always being able to produce constant expression [1] which led to the introduction of __is_constexpr() [2]. Because of its dependency on __builtin_constant_p(), statically_true() suffers from the same issues. For example: void foo(int a) { /* fail on GCC */ BUILD_BUG_ON_ZERO(statically_true(a)); /* fail on both clang and GCC */ static char arr[statically_true(a) ? 1 : 2]; } For the same reasons why __is_constexpr() was created to cover __builtin_constant_p() edge cases, __is_constexpr() can be used to resolve statically_true() limitations. Note that, somehow, GCC is not always able to fold this: __is_constexpr(x) && (x) It is OK in BUILD_BUG_ON_ZERO() but not in array declarations nor in static_assert(): void bar(int a) { /* success */ BUILD_BUG_ON_ZERO(__is_constexpr(a) && (a)); /* fail on GCC */ static char arr[__is_constexpr(a) && (a) ? 1 : 2]; /* fail on GCC */ static_assert(__is_constexpr(a) && (a)); } Encapsulating the expression in a __builtin_choose_expr() switch resolves all these failed tests. Define a new const_true() macro which, by making use of the __builtin_choose_expr() and __is_constexpr(x) combo, always produces a constant expression. It should be noted that statically_true() is the only one able to fold tautological expressions in which at least one on the operands is not a constant expression. For example: statically_true(true || var) statically_true(var == var) statically_true(var * 0 + 1) statically_true(!(var * 8 % 4)) always evaluates to true, whereas all of these would be false under const_true() if var is not a constant expression [3]. For this reason, usage of const_true() should be the exception. Reflect in the documentation that const_true() is less powerful and that statically_true() is the overall preferred solution. [1] __builtin_constant_p cannot resolve to const when optimizing Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19449 [2] commit 3c8ba0d61d04 ("kernel.h: Retain constant expression output for max()/min()") Link: https://git.kernel.org/torvalds/c/3c8ba0d61d04 [3] https://godbolt.org/z/c61PMxqbK CC: Linus Torvalds CC: Rasmus Villemoes CC: Luc Van Oostenryck Reviewed-by: Yury Norov , Signed-off-by: Vincent Mailhol Signed-off-by: Yury Norov --- include/linux/compiler.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 240c632c5b95..bea92d20f9d2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -330,6 +330,28 @@ static inline void *offset_to_ptr(const int *off) */ #define statically_true(x) (__builtin_constant_p(x) && (x)) +/* + * Similar to statically_true() but produces a constant expression + * + * To be used in conjunction with macros, such as BUILD_BUG_ON_ZERO(), + * which require their input to be a constant expression and for which + * statically_true() would otherwise fail. + * + * This is a trade-off: const_true() requires all its operands to be + * compile time constants. Else, it would always returns false even on + * the most trivial cases like: + * + * true || non_const_var + * + * On the opposite, statically_true() is able to fold more complex + * tautologies and will return true on expressions such as: + * + * !(non_const_var * 8 % 4) + * + * For the general case, statically_true() is better. + */ +#define const_true(x) __builtin_choose_expr(__is_constexpr(x), x, false) + /* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. From 4463a445a64b719e6f501d80dcc5872dde42eb73 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Thu, 14 Nov 2024 02:18:33 +0900 Subject: [PATCH 2/4] linux/bits.h: simplify GENMASK_INPUT_CHECK() In GENMASK_INPUT_CHECK(), __builtin_choose_expr(__is_constexpr((l) > (h)), (l) > (h), 0) is the exact expansion of: const_true((l) > (h)) Apply const_true() to simplify GENMASK_INPUT_CHECK(). CC: Linus Torvalds CC: Rasmus Villemoes CC: Luc Van Oostenryck Reviewed-by: Yury Norov , Signed-off-by: Vincent Mailhol Signed-off-by: Yury Norov --- include/linux/bits.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/bits.h b/include/linux/bits.h index 60044b608817..61a75d3f294b 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -20,9 +20,8 @@ */ #if !defined(__ASSEMBLY__) #include -#define GENMASK_INPUT_CHECK(h, l) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((l) > (h)), (l) > (h), 0))) +#include +#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, From f9d2ee3f51d60100c10d4db64da4413f69e81993 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 23 Nov 2024 19:30:19 -0700 Subject: [PATCH 3/4] riscv: Always inline bitops When building allmodconfig + ThinLTO with certain versions of clang, arch_set_bit() may not be inlined, resulting in a modpost warning: WARNING: modpost: vmlinux: section mismatch in reference: arch_set_bit+0x58 (section: .text.arch_set_bit) -> numa_nodes_parsed (section: .init.data) acpi_numa_rintc_affinity_init() calls arch_set_bit() via __node_set() with numa_nodes_parsed, which is marked as __initdata. If arch_set_bit() is not inlined, modpost will flag that it is being called with data that will be freed after init. As acpi_numa_rintc_affinity_init() is marked as __init, there is not actually a functional issue here. However, the bitop functions should be marked as __always_inline, so that they work consistently for init and non-init code, which the comment in include/linux/nodemask.h alludes to. This matches s390 and x86's implementations. Signed-off-by: Nathan Chancellor Signed-off-by: Yury Norov --- arch/riscv/include/asm/bitops.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index fae152ea0508..c6bd3d8354a9 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -228,7 +228,7 @@ legacy: * * This operation may be reordered on other architectures than x86. */ -static inline int arch_test_and_set_bit(int nr, volatile unsigned long *addr) +static __always_inline int arch_test_and_set_bit(int nr, volatile unsigned long *addr) { return __test_and_op_bit(or, __NOP, nr, addr); } @@ -240,7 +240,7 @@ static inline int arch_test_and_set_bit(int nr, volatile unsigned long *addr) * * This operation can be reordered on other architectures other than x86. */ -static inline int arch_test_and_clear_bit(int nr, volatile unsigned long *addr) +static __always_inline int arch_test_and_clear_bit(int nr, volatile unsigned long *addr) { return __test_and_op_bit(and, __NOT, nr, addr); } @@ -253,7 +253,7 @@ static inline int arch_test_and_clear_bit(int nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int arch_test_and_change_bit(int nr, volatile unsigned long *addr) +static __always_inline int arch_test_and_change_bit(int nr, volatile unsigned long *addr) { return __test_and_op_bit(xor, __NOP, nr, addr); } @@ -270,7 +270,7 @@ static inline int arch_test_and_change_bit(int nr, volatile unsigned long *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void arch_set_bit(int nr, volatile unsigned long *addr) +static __always_inline void arch_set_bit(int nr, volatile unsigned long *addr) { __op_bit(or, __NOP, nr, addr); } @@ -284,7 +284,7 @@ static inline void arch_set_bit(int nr, volatile unsigned long *addr) * on non x86 architectures, so if you are writing portable code, * make sure not to rely on its reordering guarantees. */ -static inline void arch_clear_bit(int nr, volatile unsigned long *addr) +static __always_inline void arch_clear_bit(int nr, volatile unsigned long *addr) { __op_bit(and, __NOT, nr, addr); } @@ -298,7 +298,7 @@ static inline void arch_clear_bit(int nr, volatile unsigned long *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void arch_change_bit(int nr, volatile unsigned long *addr) +static __always_inline void arch_change_bit(int nr, volatile unsigned long *addr) { __op_bit(xor, __NOP, nr, addr); } @@ -311,7 +311,7 @@ static inline void arch_change_bit(int nr, volatile unsigned long *addr) * This operation is atomic and provides acquire barrier semantics. * It can be used to implement bit locks. */ -static inline int arch_test_and_set_bit_lock( +static __always_inline int arch_test_and_set_bit_lock( unsigned long nr, volatile unsigned long *addr) { return __test_and_op_bit_ord(or, __NOP, nr, addr, .aq); @@ -324,7 +324,7 @@ static inline int arch_test_and_set_bit_lock( * * This operation is atomic and provides release barrier semantics. */ -static inline void arch_clear_bit_unlock( +static __always_inline void arch_clear_bit_unlock( unsigned long nr, volatile unsigned long *addr) { __op_bit_ord(and, __NOT, nr, addr, .rl); @@ -345,13 +345,13 @@ static inline void arch_clear_bit_unlock( * non-atomic property here: it's a lot more instructions and we still have to * provide release semantics anyway. */ -static inline void arch___clear_bit_unlock( +static __always_inline void arch___clear_bit_unlock( unsigned long nr, volatile unsigned long *addr) { arch_clear_bit_unlock(nr, addr); } -static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, +static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, volatile unsigned long *addr) { unsigned long res; From 7f15d4abf925f33015fb62973ce2ddb45ce04bb9 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 20 Dec 2024 16:57:39 +0000 Subject: [PATCH 4/4] cpu: Remove unused init_cpu_online The last use of init_cpu_online() was removed by the commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture") Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Yury Norov --- include/linux/cpumask.h | 1 - kernel/cpu.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9278a50d514f..590d8438514c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1043,7 +1043,6 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); /* Wrappers for arch boot code to manipulate normally-constant masks */ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); -void init_cpu_online(const struct cpumask *src); #define assign_cpu(cpu, mask, val) \ assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) diff --git a/kernel/cpu.c b/kernel/cpu.c index b605334f8ee6..2ac860df1ff0 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3128,11 +3128,6 @@ void init_cpu_possible(const struct cpumask *src) cpumask_copy(&__cpu_possible_mask, src); } -void init_cpu_online(const struct cpumask *src) -{ - cpumask_copy(&__cpu_online_mask, src); -} - void set_cpu_online(unsigned int cpu, bool online) { /*