mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-28 16:53:49 +00:00
0a571b085f
When building mpc885_ads_defconfig with gcc 10.1,
the function get_order() appears 50 times in vmlinux:
[linux]# ppc-linux-objdump -x vmlinux | grep get_order | wc -l
50
[linux]# size vmlinux
text data bss dec hex filename
3842620 675624 135160 4653404 47015c vmlinux
In the old days, marking a function 'static inline' was forcing GCC to
inline, but since commit ac7c3e4ff4
("compiler: enable
CONFIG_OPTIMIZE_INLINING forcibly") GCC may decide to not inline a
function.
It looks like GCC 10 is taking poor decisions on this.
get_order() compiles into the following tiny function, occupying 20
bytes of text.
0000007c <get_order>:
7c: 38 63 ff ff addi r3,r3,-1
80: 54 63 a3 3e rlwinm r3,r3,20,12,31
84: 7c 63 00 34 cntlzw r3,r3
88: 20 63 00 20 subfic r3,r3,32
8c: 4e 80 00 20 blr
By forcing get_order() to be __always_inline, the size of text is
reduced by 1940 bytes, that is almost twice the space occupied by
50 times get_order()
[linux-powerpc]# size vmlinux
text data bss dec hex filename
3840680 675588 135176 4651444 46f9b4 vmlinux
Link: https://lkml.kernel.org/r/96c6172d619c51acc5c1c4884b80785c59af4102.1602949927.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
53 lines
1.2 KiB
C
53 lines
1.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __ASM_GENERIC_GETORDER_H
|
|
#define __ASM_GENERIC_GETORDER_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/compiler.h>
|
|
#include <linux/log2.h>
|
|
|
|
/**
|
|
* get_order - Determine the allocation order of a memory size
|
|
* @size: The size for which to get the order
|
|
*
|
|
* Determine the allocation order of a particular sized block of memory. This
|
|
* is on a logarithmic scale, where:
|
|
*
|
|
* 0 -> 2^0 * PAGE_SIZE and below
|
|
* 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
|
|
* 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
|
|
* 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
|
|
* 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
|
|
* ...
|
|
*
|
|
* The order returned is used to find the smallest allocation granule required
|
|
* to hold an object of the specified size.
|
|
*
|
|
* The result is undefined if the size is 0.
|
|
*/
|
|
static __always_inline __attribute_const__ int get_order(unsigned long size)
|
|
{
|
|
if (__builtin_constant_p(size)) {
|
|
if (!size)
|
|
return BITS_PER_LONG - PAGE_SHIFT;
|
|
|
|
if (size < (1UL << PAGE_SHIFT))
|
|
return 0;
|
|
|
|
return ilog2((size) - 1) - PAGE_SHIFT + 1;
|
|
}
|
|
|
|
size--;
|
|
size >>= PAGE_SHIFT;
|
|
#if BITS_PER_LONG == 32
|
|
return fls(size);
|
|
#else
|
|
return fls64(size);
|
|
#endif
|
|
}
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* __ASM_GENERIC_GETORDER_H */
|