mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-11 23:50:25 +00:00
1370e97bb2
The sequence lock (seqlock) was originally designed for the cases where the readers do not need to block the writers by making the readers retry the read operation when the data change. Since then, the use cases have been expanded to include situations where a thread does not need to change the data (effectively a reader) at all but have to take the writer lock because it can't tolerate changes to the protected structure. Some examples are the d_path() function and the getcwd() syscall in fs/dcache.c where the functions take the writer lock on rename_lock even though they don't need to change anything in the protected data structure at all. This is inefficient as a reader is now blocking other sequence number reading readers from moving forward by pretending to be a writer. This patch tries to eliminate this inefficiency by introducing a new type of locking reader to the seqlock locking mechanism. This new locking reader will try to take an exclusive lock preventing other writers and locking readers from going forward. However, it won't affect the progress of the other sequence number reading readers as the sequence number won't be changed. Signed-off-by: Waiman Long <Waiman.Long@hp.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
330 lines
8.6 KiB
C
330 lines
8.6 KiB
C
#ifndef __LINUX_SEQLOCK_H
|
|
#define __LINUX_SEQLOCK_H
|
|
/*
|
|
* Reader/writer consistent mechanism without starving writers. This type of
|
|
* lock for data where the reader wants a consistent set of information
|
|
* and is willing to retry if the information changes. There are two types
|
|
* of readers:
|
|
* 1. Sequence readers which never block a writer but they may have to retry
|
|
* if a writer is in progress by detecting change in sequence number.
|
|
* Writers do not wait for a sequence reader.
|
|
* 2. Locking readers which will wait if a writer or another locking reader
|
|
* is in progress. A locking reader in progress will also block a writer
|
|
* from going forward. Unlike the regular rwlock, the read lock here is
|
|
* exclusive so that only one locking reader can get it.
|
|
*
|
|
* This is not as cache friendly as brlock. Also, this may not work well
|
|
* for data that contains pointers, because any writer could
|
|
* invalidate a pointer that a reader was following.
|
|
*
|
|
* Expected non-blocking reader usage:
|
|
* do {
|
|
* seq = read_seqbegin(&foo);
|
|
* ...
|
|
* } while (read_seqretry(&foo, seq));
|
|
*
|
|
*
|
|
* On non-SMP the spin locks disappear but the writer still needs
|
|
* to increment the sequence variables because an interrupt routine could
|
|
* change the state of the data.
|
|
*
|
|
* Based on x86_64 vsyscall gettimeofday
|
|
* by Keith Owens and Andrea Arcangeli
|
|
*/
|
|
|
|
#include <linux/spinlock.h>
|
|
#include <linux/preempt.h>
|
|
#include <asm/processor.h>
|
|
|
|
/*
|
|
* Version using sequence counter only.
|
|
* This can be used when code has its own mutex protecting the
|
|
* updating starting before the write_seqcountbeqin() and ending
|
|
* after the write_seqcount_end().
|
|
*/
|
|
typedef struct seqcount {
|
|
unsigned sequence;
|
|
} seqcount_t;
|
|
|
|
#define SEQCNT_ZERO { 0 }
|
|
#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0)
|
|
|
|
/**
|
|
* __read_seqcount_begin - begin a seq-read critical section (without barrier)
|
|
* @s: pointer to seqcount_t
|
|
* Returns: count to be passed to read_seqcount_retry
|
|
*
|
|
* __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
|
|
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
|
|
* provided before actually loading any of the variables that are to be
|
|
* protected in this critical section.
|
|
*
|
|
* Use carefully, only in critical code, and comment how the barrier is
|
|
* provided.
|
|
*/
|
|
static inline unsigned __read_seqcount_begin(const seqcount_t *s)
|
|
{
|
|
unsigned ret;
|
|
|
|
repeat:
|
|
ret = ACCESS_ONCE(s->sequence);
|
|
if (unlikely(ret & 1)) {
|
|
cpu_relax();
|
|
goto repeat;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* read_seqcount_begin - begin a seq-read critical section
|
|
* @s: pointer to seqcount_t
|
|
* Returns: count to be passed to read_seqcount_retry
|
|
*
|
|
* read_seqcount_begin opens a read critical section of the given seqcount.
|
|
* Validity of the critical section is tested by checking read_seqcount_retry
|
|
* function.
|
|
*/
|
|
static inline unsigned read_seqcount_begin(const seqcount_t *s)
|
|
{
|
|
unsigned ret = __read_seqcount_begin(s);
|
|
smp_rmb();
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* raw_seqcount_begin - begin a seq-read critical section
|
|
* @s: pointer to seqcount_t
|
|
* Returns: count to be passed to read_seqcount_retry
|
|
*
|
|
* raw_seqcount_begin opens a read critical section of the given seqcount.
|
|
* Validity of the critical section is tested by checking read_seqcount_retry
|
|
* function.
|
|
*
|
|
* Unlike read_seqcount_begin(), this function will not wait for the count
|
|
* to stabilize. If a writer is active when we begin, we will fail the
|
|
* read_seqcount_retry() instead of stabilizing at the beginning of the
|
|
* critical section.
|
|
*/
|
|
static inline unsigned raw_seqcount_begin(const seqcount_t *s)
|
|
{
|
|
unsigned ret = ACCESS_ONCE(s->sequence);
|
|
smp_rmb();
|
|
return ret & ~1;
|
|
}
|
|
|
|
/**
|
|
* __read_seqcount_retry - end a seq-read critical section (without barrier)
|
|
* @s: pointer to seqcount_t
|
|
* @start: count, from read_seqcount_begin
|
|
* Returns: 1 if retry is required, else 0
|
|
*
|
|
* __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
|
|
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
|
|
* provided before actually loading any of the variables that are to be
|
|
* protected in this critical section.
|
|
*
|
|
* Use carefully, only in critical code, and comment how the barrier is
|
|
* provided.
|
|
*/
|
|
static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
|
|
{
|
|
return unlikely(s->sequence != start);
|
|
}
|
|
|
|
/**
|
|
* read_seqcount_retry - end a seq-read critical section
|
|
* @s: pointer to seqcount_t
|
|
* @start: count, from read_seqcount_begin
|
|
* Returns: 1 if retry is required, else 0
|
|
*
|
|
* read_seqcount_retry closes a read critical section of the given seqcount.
|
|
* If the critical section was invalid, it must be ignored (and typically
|
|
* retried).
|
|
*/
|
|
static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
|
|
{
|
|
smp_rmb();
|
|
return __read_seqcount_retry(s, start);
|
|
}
|
|
|
|
|
|
/*
|
|
* Sequence counter only version assumes that callers are using their
|
|
* own mutexing.
|
|
*/
|
|
static inline void write_seqcount_begin(seqcount_t *s)
|
|
{
|
|
s->sequence++;
|
|
smp_wmb();
|
|
}
|
|
|
|
static inline void write_seqcount_end(seqcount_t *s)
|
|
{
|
|
smp_wmb();
|
|
s->sequence++;
|
|
}
|
|
|
|
/**
|
|
* write_seqcount_barrier - invalidate in-progress read-side seq operations
|
|
* @s: pointer to seqcount_t
|
|
*
|
|
* After write_seqcount_barrier, no read-side seq operations will complete
|
|
* successfully and see data older than this.
|
|
*/
|
|
static inline void write_seqcount_barrier(seqcount_t *s)
|
|
{
|
|
smp_wmb();
|
|
s->sequence+=2;
|
|
}
|
|
|
|
typedef struct {
|
|
struct seqcount seqcount;
|
|
spinlock_t lock;
|
|
} seqlock_t;
|
|
|
|
/*
|
|
* These macros triggered gcc-3.x compile-time problems. We think these are
|
|
* OK now. Be cautious.
|
|
*/
|
|
#define __SEQLOCK_UNLOCKED(lockname) \
|
|
{ \
|
|
.seqcount = SEQCNT_ZERO, \
|
|
.lock = __SPIN_LOCK_UNLOCKED(lockname) \
|
|
}
|
|
|
|
#define seqlock_init(x) \
|
|
do { \
|
|
seqcount_init(&(x)->seqcount); \
|
|
spin_lock_init(&(x)->lock); \
|
|
} while (0)
|
|
|
|
#define DEFINE_SEQLOCK(x) \
|
|
seqlock_t x = __SEQLOCK_UNLOCKED(x)
|
|
|
|
/*
|
|
* Read side functions for starting and finalizing a read side section.
|
|
*/
|
|
static inline unsigned read_seqbegin(const seqlock_t *sl)
|
|
{
|
|
return read_seqcount_begin(&sl->seqcount);
|
|
}
|
|
|
|
static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
|
|
{
|
|
return read_seqcount_retry(&sl->seqcount, start);
|
|
}
|
|
|
|
/*
|
|
* Lock out other writers and update the count.
|
|
* Acts like a normal spin_lock/unlock.
|
|
* Don't need preempt_disable() because that is in the spin_lock already.
|
|
*/
|
|
static inline void write_seqlock(seqlock_t *sl)
|
|
{
|
|
spin_lock(&sl->lock);
|
|
write_seqcount_begin(&sl->seqcount);
|
|
}
|
|
|
|
static inline void write_sequnlock(seqlock_t *sl)
|
|
{
|
|
write_seqcount_end(&sl->seqcount);
|
|
spin_unlock(&sl->lock);
|
|
}
|
|
|
|
static inline void write_seqlock_bh(seqlock_t *sl)
|
|
{
|
|
spin_lock_bh(&sl->lock);
|
|
write_seqcount_begin(&sl->seqcount);
|
|
}
|
|
|
|
static inline void write_sequnlock_bh(seqlock_t *sl)
|
|
{
|
|
write_seqcount_end(&sl->seqcount);
|
|
spin_unlock_bh(&sl->lock);
|
|
}
|
|
|
|
static inline void write_seqlock_irq(seqlock_t *sl)
|
|
{
|
|
spin_lock_irq(&sl->lock);
|
|
write_seqcount_begin(&sl->seqcount);
|
|
}
|
|
|
|
static inline void write_sequnlock_irq(seqlock_t *sl)
|
|
{
|
|
write_seqcount_end(&sl->seqcount);
|
|
spin_unlock_irq(&sl->lock);
|
|
}
|
|
|
|
static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&sl->lock, flags);
|
|
write_seqcount_begin(&sl->seqcount);
|
|
return flags;
|
|
}
|
|
|
|
#define write_seqlock_irqsave(lock, flags) \
|
|
do { flags = __write_seqlock_irqsave(lock); } while (0)
|
|
|
|
static inline void
|
|
write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
|
|
{
|
|
write_seqcount_end(&sl->seqcount);
|
|
spin_unlock_irqrestore(&sl->lock, flags);
|
|
}
|
|
|
|
/*
|
|
* A locking reader exclusively locks out other writers and locking readers,
|
|
* but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
|
|
* Don't need preempt_disable() because that is in the spin_lock already.
|
|
*/
|
|
static inline void read_seqlock_excl(seqlock_t *sl)
|
|
{
|
|
spin_lock(&sl->lock);
|
|
}
|
|
|
|
static inline void read_sequnlock_excl(seqlock_t *sl)
|
|
{
|
|
spin_unlock(&sl->lock);
|
|
}
|
|
|
|
static inline void read_seqlock_excl_bh(seqlock_t *sl)
|
|
{
|
|
spin_lock_bh(&sl->lock);
|
|
}
|
|
|
|
static inline void read_sequnlock_excl_bh(seqlock_t *sl)
|
|
{
|
|
spin_unlock_bh(&sl->lock);
|
|
}
|
|
|
|
static inline void read_seqlock_excl_irq(seqlock_t *sl)
|
|
{
|
|
spin_lock_irq(&sl->lock);
|
|
}
|
|
|
|
static inline void read_sequnlock_excl_irq(seqlock_t *sl)
|
|
{
|
|
spin_unlock_irq(&sl->lock);
|
|
}
|
|
|
|
static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&sl->lock, flags);
|
|
return flags;
|
|
}
|
|
|
|
#define read_seqlock_excl_irqsave(lock, flags) \
|
|
do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
|
|
|
|
static inline void
|
|
read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
|
|
{
|
|
spin_unlock_irqrestore(&sl->lock, flags);
|
|
}
|
|
|
|
#endif /* __LINUX_SEQLOCK_H */
|