mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 18:52:02 +00:00
lib/sort: avoid indirect calls to built-in swap
Similar to what's being done in the net code, this takes advantage of the fact that most invocations use only a few common swap functions, and replaces indirect calls to them with (highly predictable) conditional branches. (The downside, of course, is that if you *do* use a custom swap function, there are a few extra predicted branches on the code path.) This actually *shrinks* the x86-64 code, because it inlines the various swap functions inside do_swap, eliding function prologues & epilogues. x86-64 code size 767 -> 703 bytes (-64) Link: http://lkml.kernel.org/r/d10c5d4b393a1847f32f5b26f4bbaa2857140e1e.1552704200.git.lkml@sdf.org Signed-off-by: George Spelvin <lkml@sdf.org> Acked-by: Andrey Abramov <st5pub@yandex.ru> Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Cc: Daniel Wagner <daniel.wagner@siemens.com> Cc: Dave Chinner <dchinner@redhat.com> Cc: Don Mullis <don.mullis@gmail.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
22a241ccb2
commit
8fb583c425
51
lib/sort.c
51
lib/sort.c
@ -54,10 +54,8 @@ static bool is_aligned(const void *base, size_t size, unsigned char align)
|
|||||||
* subtract (since the intervening mov instructions don't alter the flags).
|
* subtract (since the intervening mov instructions don't alter the flags).
|
||||||
* Gcc 8.1.0 doesn't have that problem.
|
* Gcc 8.1.0 doesn't have that problem.
|
||||||
*/
|
*/
|
||||||
static void swap_words_32(void *a, void *b, int size)
|
static void swap_words_32(void *a, void *b, size_t n)
|
||||||
{
|
{
|
||||||
size_t n = (unsigned int)size;
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
u32 t = *(u32 *)(a + (n -= 4));
|
u32 t = *(u32 *)(a + (n -= 4));
|
||||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||||
@ -80,10 +78,8 @@ static void swap_words_32(void *a, void *b, int size)
|
|||||||
* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
|
* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
|
||||||
* x32 ABI). Are there any cases the kernel needs to worry about?
|
* x32 ABI). Are there any cases the kernel needs to worry about?
|
||||||
*/
|
*/
|
||||||
static void swap_words_64(void *a, void *b, int size)
|
static void swap_words_64(void *a, void *b, size_t n)
|
||||||
{
|
{
|
||||||
size_t n = (unsigned int)size;
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
#ifdef CONFIG_64BIT
|
#ifdef CONFIG_64BIT
|
||||||
u64 t = *(u64 *)(a + (n -= 8));
|
u64 t = *(u64 *)(a + (n -= 8));
|
||||||
@ -109,10 +105,8 @@ static void swap_words_64(void *a, void *b, int size)
|
|||||||
*
|
*
|
||||||
* This is the fallback if alignment doesn't allow using larger chunks.
|
* This is the fallback if alignment doesn't allow using larger chunks.
|
||||||
*/
|
*/
|
||||||
static void swap_bytes(void *a, void *b, int size)
|
static void swap_bytes(void *a, void *b, size_t n)
|
||||||
{
|
{
|
||||||
size_t n = (unsigned int)size;
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
char t = ((char *)a)[--n];
|
char t = ((char *)a)[--n];
|
||||||
((char *)a)[n] = ((char *)b)[n];
|
((char *)a)[n] = ((char *)b)[n];
|
||||||
@ -120,6 +114,33 @@ static void swap_bytes(void *a, void *b, int size)
|
|||||||
} while (n);
|
} while (n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef void (*swap_func_t)(void *a, void *b, int size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The values are arbitrary as long as they can't be confused with
|
||||||
|
* a pointer, but small integers make for the smallest compare
|
||||||
|
* instructions.
|
||||||
|
*/
|
||||||
|
#define SWAP_WORDS_64 (swap_func_t)0
|
||||||
|
#define SWAP_WORDS_32 (swap_func_t)1
|
||||||
|
#define SWAP_BYTES (swap_func_t)2
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The function pointer is last to make tail calls most efficient if the
|
||||||
|
* compiler decides not to inline this function.
|
||||||
|
*/
|
||||||
|
static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
|
||||||
|
{
|
||||||
|
if (swap_func == SWAP_WORDS_64)
|
||||||
|
swap_words_64(a, b, size);
|
||||||
|
else if (swap_func == SWAP_WORDS_32)
|
||||||
|
swap_words_32(a, b, size);
|
||||||
|
else if (swap_func == SWAP_BYTES)
|
||||||
|
swap_bytes(a, b, size);
|
||||||
|
else
|
||||||
|
swap_func(a, b, (int)size);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* parent - given the offset of the child, find the offset of the parent.
|
* parent - given the offset of the child, find the offset of the parent.
|
||||||
* @i: the offset of the heap element whose parent is sought. Non-zero.
|
* @i: the offset of the heap element whose parent is sought. Non-zero.
|
||||||
@ -157,7 +178,7 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
|
|||||||
* This function does a heapsort on the given array. You may provide
|
* This function does a heapsort on the given array. You may provide
|
||||||
* a swap_func function if you need to do something more than a memory
|
* a swap_func function if you need to do something more than a memory
|
||||||
* copy (e.g. fix up pointers or auxiliary data), but the built-in swap
|
* copy (e.g. fix up pointers or auxiliary data), but the built-in swap
|
||||||
* isn't usually a bottleneck.
|
* avoids a slow retpoline and so is significantly faster.
|
||||||
*
|
*
|
||||||
* Sorting time is O(n log n) both on average and worst-case. While
|
* Sorting time is O(n log n) both on average and worst-case. While
|
||||||
* quicksort is slightly faster on average, it suffers from exploitable
|
* quicksort is slightly faster on average, it suffers from exploitable
|
||||||
@ -177,11 +198,11 @@ void sort(void *base, size_t num, size_t size,
|
|||||||
|
|
||||||
if (!swap_func) {
|
if (!swap_func) {
|
||||||
if (is_aligned(base, size, 8))
|
if (is_aligned(base, size, 8))
|
||||||
swap_func = swap_words_64;
|
swap_func = SWAP_WORDS_64;
|
||||||
else if (is_aligned(base, size, 4))
|
else if (is_aligned(base, size, 4))
|
||||||
swap_func = swap_words_32;
|
swap_func = SWAP_WORDS_32;
|
||||||
else
|
else
|
||||||
swap_func = swap_bytes;
|
swap_func = SWAP_BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -197,7 +218,7 @@ void sort(void *base, size_t num, size_t size,
|
|||||||
if (a) /* Building heap: sift down --a */
|
if (a) /* Building heap: sift down --a */
|
||||||
a -= size;
|
a -= size;
|
||||||
else if (n -= size) /* Sorting: Extract root to --n */
|
else if (n -= size) /* Sorting: Extract root to --n */
|
||||||
swap_func(base, base + n, size);
|
do_swap(base, base + n, size, swap_func);
|
||||||
else /* Sort complete */
|
else /* Sort complete */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -224,7 +245,7 @@ void sort(void *base, size_t num, size_t size,
|
|||||||
c = b; /* Where "a" belongs */
|
c = b; /* Where "a" belongs */
|
||||||
while (b != a) { /* Shift it into place */
|
while (b != a) { /* Shift it into place */
|
||||||
b = parent(b, lsbit, size);
|
b = parent(b, lsbit, size);
|
||||||
swap_func(base + b, base + c, size);
|
do_swap(base + b, base + c, size, swap_func);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user