Peter Zijlstra c427f69564 locking/mutex: Optimize __mutex_trylock_fast()
Use try_cmpxchg to avoid the pointless TEST instruction..
And add the (missing) atomic_long_try_cmpxchg*() wrappery.

On x86_64 this gives:

0000000000000710 <mutex_lock>:						0000000000000710 <mutex_lock>:
 710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx                      710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx
 717:   00 00                                                            717:   00 00
                        715: R_X86_64_32S       current_task                                    715: R_X86_64_32S       current_task
 719:   31 c0                   xor    %eax,%eax                         719:   31 c0                   xor    %eax,%eax
 71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)                 71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)
 720:   48 85 c0                test   %rax,%rax                         720:   75 02                   jne    724 <mutex_lock+0x14>
 723:   75 02                   jne    727 <mutex_lock+0x17>             722:   f3 c3                   repz retq
 725:   f3 c3                   repz retq                                724:   eb da                   jmp    700 <__mutex_lock_slowpath>
 727:   eb d7                   jmp    700 <__mutex_lock_slowpath>       726:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
 729:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)                         72d:   00 00 00

On ARM64 this gives:

000000000000638 <mutex_lock>:						0000000000000638 <mutex_lock>:
     638:       d5384101        mrs     x1, sp_el0                           638:       d5384101        mrs     x1, sp_el0
     63c:       d2800002        mov     x2, #0x0                             63c:       d2800002        mov     x2, #0x0
     640:       f9800011        prfm    pstl1strm, [x0]                      640:       f9800011        prfm    pstl1strm, [x0]
     644:       c85ffc03        ldaxr   x3, [x0]                             644:       c85ffc03        ldaxr   x3, [x0]
     648:       ca020064        eor     x4, x3, x2                           648:       ca020064        eor     x4, x3, x2
     64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>            64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>
     650:       c8047c01        stxr    w4, x1, [x0]                         650:       c8047c01        stxr    w4, x1, [x0]
     654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>             654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>
     658:       b40000c3        cbz     x3, 670 <mutex_lock+0x38>            658:       b5000043        cbnz    x3, 660 <mutex_lock+0x28>
     65c:       a9bf7bfd        stp     x29, x30, [sp,#-16]!                 65c:       d65f03c0        ret
     660:       910003fd        mov     x29, sp                              660:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
     664:       97ffffef        bl      620 <__mutex_lock_slowpath>          664:       910003fd        mov     x29, sp
     668:       a8c17bfd        ldp     x29, x30, [sp],#16                   668:       97ffffee        bl      620 <__mutex_lock_slowpath>
     66c:       d65f03c0        ret                                          66c:       a8c17bfd        ldp     x29, x30, [sp],#16
     670:       d65f03c0        ret                                          670:       d65f03c0        ret

Reported-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-05-04 10:02:39 +02:00

270 lines
7.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_GENERIC_ATOMIC_LONG_H
#define _ASM_GENERIC_ATOMIC_LONG_H
/*
* Copyright (C) 2005 Silicon Graphics, Inc.
* Christoph Lameter
*
* Allows to provide arch independent atomic definitions without the need to
* edit all arch specific atomic.h files.
*/
#include <asm/types.h>
/*
* Suppport for atomic_long_t
*
* Casts for parameters are avoided for existing atomic functions in order to
* avoid issues with cast-as-lval under gcc 4.x and other limitations that the
* macros of a platform may have.
*/
#if BITS_PER_LONG == 64
typedef atomic64_t atomic_long_t;
#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i)
#define ATOMIC_LONG_PFX(x) atomic64 ## x
#define ATOMIC_LONG_TYPE s64
#else
typedef atomic_t atomic_long_t;
#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i)
#define ATOMIC_LONG_PFX(x) atomic ## x
#define ATOMIC_LONG_TYPE int
#endif
#define ATOMIC_LONG_READ_OP(mo) \
static inline long atomic_long_read##mo(const atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
return (long)ATOMIC_LONG_PFX(_read##mo)(v); \
}
ATOMIC_LONG_READ_OP()
ATOMIC_LONG_READ_OP(_acquire)
#undef ATOMIC_LONG_READ_OP
#define ATOMIC_LONG_SET_OP(mo) \
static inline void atomic_long_set##mo(atomic_long_t *l, long i) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
ATOMIC_LONG_PFX(_set##mo)(v, i); \
}
ATOMIC_LONG_SET_OP()
ATOMIC_LONG_SET_OP(_release)
#undef ATOMIC_LONG_SET_OP
#define ATOMIC_LONG_ADD_SUB_OP(op, mo) \
static inline long \
atomic_long_##op##_return##mo(long i, atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(i, v); \
}
ATOMIC_LONG_ADD_SUB_OP(add,)
ATOMIC_LONG_ADD_SUB_OP(add, _relaxed)
ATOMIC_LONG_ADD_SUB_OP(add, _acquire)
ATOMIC_LONG_ADD_SUB_OP(add, _release)
ATOMIC_LONG_ADD_SUB_OP(sub,)
ATOMIC_LONG_ADD_SUB_OP(sub, _relaxed)
ATOMIC_LONG_ADD_SUB_OP(sub, _acquire)
ATOMIC_LONG_ADD_SUB_OP(sub, _release)
#undef ATOMIC_LONG_ADD_SUB_OP
#define atomic_long_cmpxchg_relaxed(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
(old), (new)))
#define atomic_long_cmpxchg_acquire(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
(old), (new)))
#define atomic_long_cmpxchg_release(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
(old), (new)))
#define atomic_long_cmpxchg(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
#define atomic_long_try_cmpxchg_relaxed(l, old, new) \
(ATOMIC_LONG_PFX(_try_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
#define atomic_long_try_cmpxchg_acquire(l, old, new) \
(ATOMIC_LONG_PFX(_try_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
#define atomic_long_try_cmpxchg_release(l, old, new) \
(ATOMIC_LONG_PFX(_try_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
#define atomic_long_try_cmpxchg(l, old, new) \
(ATOMIC_LONG_PFX(_try_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), \
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
#define atomic_long_xchg_relaxed(v, new) \
(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
#define atomic_long_xchg_acquire(v, new) \
(ATOMIC_LONG_PFX(_xchg_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
#define atomic_long_xchg_release(v, new) \
(ATOMIC_LONG_PFX(_xchg_release)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
#define atomic_long_xchg(v, new) \
(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
static __always_inline void atomic_long_inc(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
ATOMIC_LONG_PFX(_inc)(v);
}
static __always_inline void atomic_long_dec(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
ATOMIC_LONG_PFX(_dec)(v);
}
#define ATOMIC_LONG_FETCH_OP(op, mo) \
static inline long \
atomic_long_fetch_##op##mo(long i, atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(i, v); \
}
ATOMIC_LONG_FETCH_OP(add, )
ATOMIC_LONG_FETCH_OP(add, _relaxed)
ATOMIC_LONG_FETCH_OP(add, _acquire)
ATOMIC_LONG_FETCH_OP(add, _release)
ATOMIC_LONG_FETCH_OP(sub, )
ATOMIC_LONG_FETCH_OP(sub, _relaxed)
ATOMIC_LONG_FETCH_OP(sub, _acquire)
ATOMIC_LONG_FETCH_OP(sub, _release)
ATOMIC_LONG_FETCH_OP(and, )
ATOMIC_LONG_FETCH_OP(and, _relaxed)
ATOMIC_LONG_FETCH_OP(and, _acquire)
ATOMIC_LONG_FETCH_OP(and, _release)
ATOMIC_LONG_FETCH_OP(andnot, )
ATOMIC_LONG_FETCH_OP(andnot, _relaxed)
ATOMIC_LONG_FETCH_OP(andnot, _acquire)
ATOMIC_LONG_FETCH_OP(andnot, _release)
ATOMIC_LONG_FETCH_OP(or, )
ATOMIC_LONG_FETCH_OP(or, _relaxed)
ATOMIC_LONG_FETCH_OP(or, _acquire)
ATOMIC_LONG_FETCH_OP(or, _release)
ATOMIC_LONG_FETCH_OP(xor, )
ATOMIC_LONG_FETCH_OP(xor, _relaxed)
ATOMIC_LONG_FETCH_OP(xor, _acquire)
ATOMIC_LONG_FETCH_OP(xor, _release)
#undef ATOMIC_LONG_FETCH_OP
#define ATOMIC_LONG_FETCH_INC_DEC_OP(op, mo) \
static inline long \
atomic_long_fetch_##op##mo(atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(v); \
}
ATOMIC_LONG_FETCH_INC_DEC_OP(inc,)
ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _relaxed)
ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _acquire)
ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _release)
ATOMIC_LONG_FETCH_INC_DEC_OP(dec,)
ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _relaxed)
ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _acquire)
ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _release)
#undef ATOMIC_LONG_FETCH_INC_DEC_OP
#define ATOMIC_LONG_OP(op) \
static __always_inline void \
atomic_long_##op(long i, atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
ATOMIC_LONG_PFX(_##op)(i, v); \
}
ATOMIC_LONG_OP(add)
ATOMIC_LONG_OP(sub)
ATOMIC_LONG_OP(and)
ATOMIC_LONG_OP(andnot)
ATOMIC_LONG_OP(or)
ATOMIC_LONG_OP(xor)
#undef ATOMIC_LONG_OP
static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return ATOMIC_LONG_PFX(_sub_and_test)(i, v);
}
static inline int atomic_long_dec_and_test(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return ATOMIC_LONG_PFX(_dec_and_test)(v);
}
static inline int atomic_long_inc_and_test(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return ATOMIC_LONG_PFX(_inc_and_test)(v);
}
static inline int atomic_long_add_negative(long i, atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return ATOMIC_LONG_PFX(_add_negative)(i, v);
}
#define ATOMIC_LONG_INC_DEC_OP(op, mo) \
static inline long \
atomic_long_##op##_return##mo(atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(v); \
}
ATOMIC_LONG_INC_DEC_OP(inc,)
ATOMIC_LONG_INC_DEC_OP(inc, _relaxed)
ATOMIC_LONG_INC_DEC_OP(inc, _acquire)
ATOMIC_LONG_INC_DEC_OP(inc, _release)
ATOMIC_LONG_INC_DEC_OP(dec,)
ATOMIC_LONG_INC_DEC_OP(dec, _relaxed)
ATOMIC_LONG_INC_DEC_OP(dec, _acquire)
ATOMIC_LONG_INC_DEC_OP(dec, _release)
#undef ATOMIC_LONG_INC_DEC_OP
static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return (long)ATOMIC_LONG_PFX(_add_unless)(v, a, u);
}
#define atomic_long_inc_not_zero(l) \
ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
#define atomic_long_cond_read_relaxed(v, c) \
ATOMIC_LONG_PFX(_cond_read_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (c))
#define atomic_long_cond_read_acquire(v, c) \
ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c))
#endif /* _ASM_GENERIC_ATOMIC_LONG_H */