2018-06-18 12:52:50 +10:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
2014-08-02 11:47:44 +02:00
|
|
|
/*
|
|
|
|
* Resizable, Scalable, Concurrent Hash Table
|
|
|
|
*
|
2016-09-19 19:00:09 +08:00
|
|
|
* Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
|
2015-03-24 20:42:19 +00:00
|
|
|
* Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
|
2014-08-02 11:47:44 +02:00
|
|
|
* Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
|
|
|
|
*
|
|
|
|
* Code partially derived from nft_hash
|
2015-03-20 21:57:06 +11:00
|
|
|
* Rewritten with rehash code from br_multicast plus single list
|
|
|
|
* pointer as suggested by Josh Triplett
|
2014-08-02 11:47:44 +02:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _LINUX_RHASHTABLE_H
|
|
|
|
#define _LINUX_RHASHTABLE_H
|
|
|
|
|
2015-12-03 20:41:29 +08:00
|
|
|
#include <linux/err.h>
|
2015-03-20 18:18:45 -04:00
|
|
|
#include <linux/errno.h>
|
2015-03-24 00:50:21 +11:00
|
|
|
#include <linux/jhash.h>
|
2015-01-02 23:00:21 +01:00
|
|
|
#include <linux/list_nulls.h>
|
2015-01-02 23:00:20 +01:00
|
|
|
#include <linux/workqueue.h>
|
2017-02-04 01:27:20 +01:00
|
|
|
#include <linux/rculist.h>
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
#include <linux/bit_spinlock.h>
|
2014-08-02 11:47:44 +02:00
|
|
|
|
2018-06-18 12:52:50 +10:00
|
|
|
#include <linux/rhashtable-types.h>
|
2015-01-02 23:00:21 +01:00
|
|
|
/*
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* Objects in an rhashtable have an embedded struct rhash_head
|
|
|
|
* which is linked into as hash chain from the hash table - or one
|
|
|
|
* of two or more hash tables when the rhashtable is being resized.
|
2015-01-02 23:00:21 +01:00
|
|
|
* The end of the chain is marked with a special nulls marks which has
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* the least significant bit set but otherwise stores the address of
|
|
|
|
* the hash bucket. This allows us to be be sure we've found the end
|
|
|
|
* of the right list.
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
* The value stored in the hash bucket has BIT(0) used as a lock bit.
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* This bit must be atomically set before any changes are made to
|
|
|
|
* the chain. To avoid dereferencing this pointer without clearing
|
|
|
|
* the bit first, we use an opaque 'struct rhash_lock_head *' for the
|
|
|
|
* pointer stored in the bucket. This struct needs to be defined so
|
2019-04-12 11:52:07 +10:00
|
|
|
* that rcu_dereference() works on it, but it has no content so a
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* cast is needed for it to be useful. This ensures it isn't
|
|
|
|
* used by mistake with clearing the lock bit first.
|
2015-01-02 23:00:21 +01:00
|
|
|
*/
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
struct rhash_lock_head {};
|
2015-03-20 21:57:00 +11:00
|
|
|
|
2017-04-16 02:55:09 +02:00
|
|
|
/* Maximum chain length before rehash
|
|
|
|
*
|
|
|
|
* The maximum (not average) chain length grows with the size of the hash
|
|
|
|
* table, at a rate of (log N)/(log log N).
|
|
|
|
*
|
|
|
|
* The value of 16 is selected so that even if the hash table grew to
|
|
|
|
* 2^32 you would not expect the maximum chain length to exceed it
|
|
|
|
* unless we are under attack (or extremely unlucky).
|
|
|
|
*
|
|
|
|
* As this limit is only to detect attacks, we don't need to set it to a
|
|
|
|
* lower value as you'd need the chain length to vastly exceed 16 to have
|
|
|
|
* any real effect on the system.
|
|
|
|
*/
|
|
|
|
#define RHT_ELASTICITY 16u
|
|
|
|
|
2015-01-02 23:00:20 +01:00
|
|
|
/**
|
|
|
|
* struct bucket_table - Table of hash buckets
|
|
|
|
* @size: Number of hash buckets
|
2017-02-11 19:26:47 +08:00
|
|
|
* @nest: Number of bits of first-level nested table.
|
2015-03-14 13:57:24 +11:00
|
|
|
* @rehash: Current bucket being rehashed
|
2015-03-10 09:27:55 +11:00
|
|
|
* @hash_rnd: Random seed to fold into hash
|
2015-03-14 13:57:20 +11:00
|
|
|
* @walkers: List of active walkers
|
2015-03-14 13:57:23 +11:00
|
|
|
* @rcu: RCU structure for freeing the table
|
2015-03-14 13:57:25 +11:00
|
|
|
* @future_tbl: Table under construction during rehashing
|
2017-02-11 19:26:47 +08:00
|
|
|
* @ntbl: Nested table used when out of memory.
|
2015-01-02 23:00:20 +01:00
|
|
|
* @buckets: size * hash buckets
|
|
|
|
*/
|
2014-08-02 11:47:44 +02:00
|
|
|
struct bucket_table {
|
2015-03-14 13:57:24 +11:00
|
|
|
unsigned int size;
|
2017-02-11 19:26:47 +08:00
|
|
|
unsigned int nest;
|
2015-03-10 09:27:55 +11:00
|
|
|
u32 hash_rnd;
|
2015-03-14 13:57:20 +11:00
|
|
|
struct list_head walkers;
|
2015-03-14 13:57:23 +11:00
|
|
|
struct rcu_head rcu;
|
2015-02-20 06:48:57 -08:00
|
|
|
|
2015-03-14 13:57:25 +11:00
|
|
|
struct bucket_table __rcu *future_tbl;
|
|
|
|
|
2019-04-02 10:07:45 +11:00
|
|
|
struct lockdep_map dep_map;
|
|
|
|
|
2019-05-16 15:19:46 +08:00
|
|
|
struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp;
|
2014-08-02 11:47:44 +02:00
|
|
|
};
|
|
|
|
|
2018-11-30 10:26:50 +11:00
|
|
|
/*
|
|
|
|
* NULLS_MARKER() expects a hash value with the low
|
|
|
|
* bits mostly likely to be significant, and it discards
|
|
|
|
* the msb.
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
* We give it an address, in which the bottom bit is
|
2018-11-30 10:26:50 +11:00
|
|
|
* always 0, and the msb might be significant.
|
|
|
|
* So we shift the address down one bit to align with
|
|
|
|
* expectations and avoid losing a significant bit.
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
*
|
|
|
|
* We never store the NULLS_MARKER in the hash table
|
|
|
|
* itself as we need the lsb for locking.
|
|
|
|
* Instead we store a NULL
|
2018-11-30 10:26:50 +11:00
|
|
|
*/
|
|
|
|
#define RHT_NULLS_MARKER(ptr) \
|
|
|
|
((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
|
2018-06-18 12:52:50 +10:00
|
|
|
#define INIT_RHT_NULLS_HEAD(ptr) \
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
((ptr) = NULL)
|
2015-01-02 23:00:21 +01:00
|
|
|
|
|
|
|
static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
|
|
|
|
{
|
|
|
|
return ((unsigned long) ptr & 1);
|
|
|
|
}
|
|
|
|
|
2015-03-20 21:57:00 +11:00
|
|
|
static inline void *rht_obj(const struct rhashtable *ht,
|
|
|
|
const struct rhash_head *he)
|
|
|
|
{
|
|
|
|
return (char *)he - ht->p.head_offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
|
|
|
|
unsigned int hash)
|
|
|
|
{
|
2018-06-18 12:52:50 +10:00
|
|
|
return hash & (tbl->size - 1);
|
2015-03-20 21:57:00 +11:00
|
|
|
}
|
|
|
|
|
2017-12-04 10:31:43 -08:00
|
|
|
static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
|
|
|
|
const void *key, const struct rhashtable_params params,
|
|
|
|
unsigned int hash_rnd)
|
2015-03-20 21:57:00 +11:00
|
|
|
{
|
2015-03-24 14:18:17 +01:00
|
|
|
unsigned int hash;
|
2015-03-24 00:50:20 +11:00
|
|
|
|
2015-03-24 00:50:21 +11:00
|
|
|
/* params must be equal to ht->p if it isn't constant. */
|
|
|
|
if (!__builtin_constant_p(params.key_len))
|
2017-12-04 10:31:43 -08:00
|
|
|
hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
|
2015-03-24 00:50:21 +11:00
|
|
|
else if (params.key_len) {
|
2015-03-24 14:18:17 +01:00
|
|
|
unsigned int key_len = params.key_len;
|
2015-03-24 00:50:21 +11:00
|
|
|
|
|
|
|
if (params.hashfn)
|
2017-12-04 10:31:43 -08:00
|
|
|
hash = params.hashfn(key, key_len, hash_rnd);
|
2015-03-24 00:50:21 +11:00
|
|
|
else if (key_len & (sizeof(u32) - 1))
|
2017-12-04 10:31:43 -08:00
|
|
|
hash = jhash(key, key_len, hash_rnd);
|
2015-03-24 00:50:21 +11:00
|
|
|
else
|
2017-12-04 10:31:43 -08:00
|
|
|
hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
|
2015-03-24 00:50:21 +11:00
|
|
|
} else {
|
2015-03-24 14:18:17 +01:00
|
|
|
unsigned int key_len = ht->p.key_len;
|
2015-03-24 00:50:21 +11:00
|
|
|
|
|
|
|
if (params.hashfn)
|
2017-12-04 10:31:43 -08:00
|
|
|
hash = params.hashfn(key, key_len, hash_rnd);
|
2015-03-24 00:50:21 +11:00
|
|
|
else
|
2017-12-04 10:31:43 -08:00
|
|
|
hash = jhash(key, key_len, hash_rnd);
|
2015-03-24 00:50:21 +11:00
|
|
|
}
|
|
|
|
|
2017-12-04 10:31:43 -08:00
|
|
|
return hash;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int rht_key_hashfn(
|
|
|
|
struct rhashtable *ht, const struct bucket_table *tbl,
|
|
|
|
const void *key, const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd);
|
|
|
|
|
2015-03-24 00:50:21 +11:00
|
|
|
return rht_bucket_index(tbl, hash);
|
2015-03-20 21:57:00 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int rht_head_hashfn(
|
|
|
|
struct rhashtable *ht, const struct bucket_table *tbl,
|
|
|
|
const struct rhash_head *he, const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
const char *ptr = rht_obj(ht, he);
|
|
|
|
|
|
|
|
return likely(params.obj_hashfn) ?
|
2015-03-25 13:07:45 +00:00
|
|
|
rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
|
|
|
|
ht->p.key_len,
|
|
|
|
tbl->hash_rnd)) :
|
2015-03-20 21:57:00 +11:00
|
|
|
rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rht_grow_above_75 - returns true if nelems > 0.75 * table-size
|
|
|
|
* @ht: hash table
|
|
|
|
* @tbl: current table
|
|
|
|
*/
|
|
|
|
static inline bool rht_grow_above_75(const struct rhashtable *ht,
|
|
|
|
const struct bucket_table *tbl)
|
|
|
|
{
|
|
|
|
/* Expand table when exceeding 75% load */
|
|
|
|
return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
|
|
|
|
(!ht->p.max_size || tbl->size < ht->p.max_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
|
|
|
|
* @ht: hash table
|
|
|
|
* @tbl: current table
|
|
|
|
*/
|
|
|
|
static inline bool rht_shrink_below_30(const struct rhashtable *ht,
|
|
|
|
const struct bucket_table *tbl)
|
|
|
|
{
|
|
|
|
/* Shrink table beneath 30% load */
|
|
|
|
return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
|
|
|
|
tbl->size > ht->p.min_size;
|
|
|
|
}
|
|
|
|
|
2015-03-24 00:50:28 +11:00
|
|
|
/**
|
|
|
|
* rht_grow_above_100 - returns true if nelems > table-size
|
|
|
|
* @ht: hash table
|
|
|
|
* @tbl: current table
|
|
|
|
*/
|
|
|
|
static inline bool rht_grow_above_100(const struct rhashtable *ht,
|
|
|
|
const struct bucket_table *tbl)
|
|
|
|
{
|
2015-04-23 16:38:43 +02:00
|
|
|
return atomic_read(&ht->nelems) > tbl->size &&
|
|
|
|
(!ht->p.max_size || tbl->size < ht->p.max_size);
|
2015-03-24 00:50:28 +11:00
|
|
|
}
|
|
|
|
|
2015-05-15 11:30:47 +08:00
|
|
|
/**
|
|
|
|
* rht_grow_above_max - returns true if table is above maximum
|
|
|
|
* @ht: hash table
|
|
|
|
* @tbl: current table
|
|
|
|
*/
|
|
|
|
static inline bool rht_grow_above_max(const struct rhashtable *ht,
|
|
|
|
const struct bucket_table *tbl)
|
|
|
|
{
|
2017-04-27 13:44:51 +08:00
|
|
|
return atomic_read(&ht->nelems) >= ht->max_elems;
|
2015-05-15 11:30:47 +08:00
|
|
|
}
|
|
|
|
|
2014-08-02 11:47:44 +02:00
|
|
|
#ifdef CONFIG_PROVE_LOCKING
|
2015-01-02 23:00:20 +01:00
|
|
|
int lockdep_rht_mutex_is_held(struct rhashtable *ht);
|
2015-01-02 23:00:16 +01:00
|
|
|
int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
|
2014-08-02 11:47:44 +02:00
|
|
|
#else
|
2015-01-02 23:00:20 +01:00
|
|
|
static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
|
2014-08-02 11:47:44 +02:00
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
2015-01-02 23:00:16 +01:00
|
|
|
|
|
|
|
static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
|
|
|
|
u32 hash)
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
2014-08-02 11:47:44 +02:00
|
|
|
#endif /* CONFIG_PROVE_LOCKING */
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
|
|
|
|
struct rhash_head *obj);
|
2014-08-02 11:47:44 +02:00
|
|
|
|
2016-08-18 16:50:56 +08:00
|
|
|
void rhashtable_walk_enter(struct rhashtable *ht,
|
|
|
|
struct rhashtable_iter *iter);
|
2015-02-04 07:33:23 +11:00
|
|
|
void rhashtable_walk_exit(struct rhashtable_iter *iter);
|
2017-12-04 10:31:41 -08:00
|
|
|
int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU);
|
|
|
|
|
|
|
|
static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
|
|
|
|
{
|
|
|
|
(void)rhashtable_walk_start_check(iter);
|
|
|
|
}
|
|
|
|
|
2015-02-04 07:33:23 +11:00
|
|
|
void *rhashtable_walk_next(struct rhashtable_iter *iter);
|
2017-12-04 10:31:42 -08:00
|
|
|
void *rhashtable_walk_peek(struct rhashtable_iter *iter);
|
2015-02-04 07:33:23 +11:00
|
|
|
void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
|
|
|
|
|
2015-03-24 14:18:20 +01:00
|
|
|
void rhashtable_free_and_destroy(struct rhashtable *ht,
|
|
|
|
void (*free_fn)(void *ptr, void *arg),
|
|
|
|
void *arg);
|
2015-01-02 23:00:20 +01:00
|
|
|
void rhashtable_destroy(struct rhashtable *ht);
|
2014-08-02 11:47:44 +02:00
|
|
|
|
2019-05-16 15:19:46 +08:00
|
|
|
struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
|
|
|
|
unsigned int hash);
|
|
|
|
struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
|
|
|
|
unsigned int hash);
|
|
|
|
struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
|
|
|
|
struct bucket_table *tbl,
|
|
|
|
unsigned int hash);
|
2017-02-11 19:26:47 +08:00
|
|
|
|
2014-08-02 11:47:44 +02:00
|
|
|
#define rht_dereference(p, ht) \
|
|
|
|
rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
|
|
|
|
|
|
|
|
#define rht_dereference_rcu(p, ht) \
|
|
|
|
rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
|
|
|
|
|
2015-01-02 23:00:16 +01:00
|
|
|
#define rht_dereference_bucket(p, tbl, hash) \
|
|
|
|
rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
|
2014-08-02 11:47:44 +02:00
|
|
|
|
2015-01-02 23:00:16 +01:00
|
|
|
#define rht_dereference_bucket_rcu(p, tbl, hash) \
|
|
|
|
rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
|
|
|
|
|
|
|
|
#define rht_entry(tpos, pos, member) \
|
|
|
|
({ tpos = container_of(pos, typeof(*tpos), member); 1; })
|
2014-08-02 11:47:44 +02:00
|
|
|
|
2019-05-16 15:19:46 +08:00
|
|
|
static inline struct rhash_lock_head *const *rht_bucket(
|
2017-02-11 19:26:47 +08:00
|
|
|
const struct bucket_table *tbl, unsigned int hash)
|
|
|
|
{
|
|
|
|
return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
|
|
|
|
&tbl->buckets[hash];
|
|
|
|
}
|
|
|
|
|
2019-05-16 15:19:46 +08:00
|
|
|
static inline struct rhash_lock_head **rht_bucket_var(
|
2017-02-11 19:26:47 +08:00
|
|
|
struct bucket_table *tbl, unsigned int hash)
|
|
|
|
{
|
2019-04-02 10:07:45 +11:00
|
|
|
return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
|
2017-02-11 19:26:47 +08:00
|
|
|
&tbl->buckets[hash];
|
|
|
|
}
|
|
|
|
|
2019-05-16 15:19:46 +08:00
|
|
|
static inline struct rhash_lock_head **rht_bucket_insert(
|
2017-02-11 19:26:47 +08:00
|
|
|
struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
|
|
|
|
{
|
|
|
|
return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
|
|
|
|
&tbl->buckets[hash];
|
|
|
|
}
|
|
|
|
|
2019-04-12 11:52:08 +10:00
|
|
|
/*
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
* We lock a bucket by setting BIT(0) in the pointer - this is always
|
|
|
|
* zero in real pointers. The NULLS mark is never stored in the bucket,
|
|
|
|
* rather we store NULL if the bucket is empty.
|
2019-04-12 11:52:08 +10:00
|
|
|
* bit_spin_locks do not handle contention well, but the whole point
|
|
|
|
* of the hashtable design is to achieve minimum per-bucket contention.
|
|
|
|
* A nested hash table might not have a bucket pointer. In that case
|
|
|
|
* we cannot get a lock. For remove and replace the bucket cannot be
|
|
|
|
* interesting and doesn't need locking.
|
|
|
|
* For insert we allocate the bucket if this is the last bucket_table,
|
|
|
|
* and then take the lock.
|
|
|
|
* Sometimes we unlock a bucket by writing a new pointer there. In that
|
|
|
|
* case we don't need to unlock, but we do need to reset state such as
|
|
|
|
* local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
|
|
|
|
* provides the same release semantics that bit_spin_unlock() provides,
|
|
|
|
* this is safe.
|
2019-04-12 11:52:08 +10:00
|
|
|
* When we write to a bucket without unlocking, we use rht_assign_locked().
|
2019-04-12 11:52:08 +10:00
|
|
|
*/
|
|
|
|
|
|
|
|
static inline void rht_lock(struct bucket_table *tbl,
|
|
|
|
struct rhash_lock_head **bkt)
|
|
|
|
{
|
|
|
|
local_bh_disable();
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
bit_spin_lock(0, (unsigned long *)bkt);
|
2019-04-12 11:52:08 +10:00
|
|
|
lock_map_acquire(&tbl->dep_map);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void rht_lock_nested(struct bucket_table *tbl,
|
|
|
|
struct rhash_lock_head **bucket,
|
|
|
|
unsigned int subclass)
|
|
|
|
{
|
|
|
|
local_bh_disable();
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
bit_spin_lock(0, (unsigned long *)bucket);
|
2019-04-12 11:52:08 +10:00
|
|
|
lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void rht_unlock(struct bucket_table *tbl,
|
|
|
|
struct rhash_lock_head **bkt)
|
|
|
|
{
|
|
|
|
lock_map_release(&tbl->dep_map);
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
bit_spin_unlock(0, (unsigned long *)bkt);
|
2019-04-12 11:52:08 +10:00
|
|
|
local_bh_enable();
|
|
|
|
}
|
|
|
|
|
2019-05-16 15:19:46 +08:00
|
|
|
static inline struct rhash_head __rcu *__rht_ptr(
|
|
|
|
struct rhash_lock_head *const *bkt)
|
|
|
|
{
|
2019-05-28 15:02:31 +08:00
|
|
|
return (struct rhash_head __rcu *)
|
|
|
|
((unsigned long)*bkt & ~BIT(0) ?:
|
|
|
|
(unsigned long)RHT_NULLS_MARKER(bkt));
|
2019-05-16 15:19:46 +08:00
|
|
|
}
|
|
|
|
|
2019-04-12 11:52:08 +10:00
|
|
|
/*
|
2019-04-12 11:52:08 +10:00
|
|
|
* Where 'bkt' is a bucket and might be locked:
|
2019-05-28 15:02:31 +08:00
|
|
|
* rht_ptr_rcu() dereferences that pointer and clears the lock bit.
|
|
|
|
* rht_ptr() dereferences in a context where the bucket is locked.
|
2019-04-12 11:52:08 +10:00
|
|
|
* rht_ptr_exclusive() dereferences in a context where exclusive
|
|
|
|
* access is guaranteed, such as when destroying the table.
|
2019-04-12 11:52:08 +10:00
|
|
|
*/
|
2019-05-28 15:02:31 +08:00
|
|
|
static inline struct rhash_head *rht_ptr_rcu(
|
|
|
|
struct rhash_lock_head *const *bkt)
|
|
|
|
{
|
|
|
|
struct rhash_head __rcu *p = __rht_ptr(bkt);
|
|
|
|
|
|
|
|
return rcu_dereference(p);
|
|
|
|
}
|
|
|
|
|
2019-04-12 11:52:08 +10:00
|
|
|
static inline struct rhash_head *rht_ptr(
|
2019-05-16 15:19:46 +08:00
|
|
|
struct rhash_lock_head *const *bkt,
|
2019-04-12 11:52:08 +10:00
|
|
|
struct bucket_table *tbl,
|
|
|
|
unsigned int hash)
|
2019-04-12 11:52:08 +10:00
|
|
|
{
|
2019-05-28 15:02:31 +08:00
|
|
|
return rht_dereference_bucket(__rht_ptr(bkt), tbl, hash);
|
2019-04-12 11:52:08 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct rhash_head *rht_ptr_exclusive(
|
2019-05-16 15:19:46 +08:00
|
|
|
struct rhash_lock_head *const *bkt)
|
2019-04-12 11:52:08 +10:00
|
|
|
{
|
2019-05-28 15:02:31 +08:00
|
|
|
return rcu_dereference_protected(__rht_ptr(bkt), 1);
|
2019-04-12 11:52:08 +10:00
|
|
|
}
|
|
|
|
|
2019-05-16 15:19:46 +08:00
|
|
|
static inline void rht_assign_locked(struct rhash_lock_head **bkt,
|
2019-04-12 11:52:08 +10:00
|
|
|
struct rhash_head *obj)
|
2019-04-12 11:52:08 +10:00
|
|
|
{
|
2019-04-12 11:52:08 +10:00
|
|
|
struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
|
|
|
|
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
if (rht_is_a_nulls(obj))
|
|
|
|
obj = NULL;
|
|
|
|
rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0)));
|
2019-04-12 11:52:08 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void rht_assign_unlock(struct bucket_table *tbl,
|
2019-05-16 15:19:46 +08:00
|
|
|
struct rhash_lock_head **bkt,
|
2019-04-12 11:52:08 +10:00
|
|
|
struct rhash_head *obj)
|
|
|
|
{
|
|
|
|
struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
|
|
|
|
|
rhashtable: use BIT(0) for locking.
As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture. m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.
The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move. The NULLS marker is only needed in a ->next pointer.
As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.
So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.
And convert all places that used bit 1 for locking, to use bit 0.
Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-12 11:52:08 +10:00
|
|
|
if (rht_is_a_nulls(obj))
|
|
|
|
obj = NULL;
|
2019-04-12 11:52:08 +10:00
|
|
|
lock_map_release(&tbl->dep_map);
|
|
|
|
rcu_assign_pointer(*p, obj);
|
|
|
|
preempt_enable();
|
|
|
|
__release(bitlock);
|
|
|
|
local_bh_enable();
|
|
|
|
}
|
|
|
|
|
2014-08-02 11:47:44 +02:00
|
|
|
/**
|
2019-03-21 14:42:40 +11:00
|
|
|
* rht_for_each_from - iterate over hash chain from given head
|
2015-01-02 23:00:16 +01:00
|
|
|
* @pos: the &struct rhash_head to use as a loop cursor.
|
2019-03-21 14:42:40 +11:00
|
|
|
* @head: the &struct rhash_head to start from
|
2015-01-02 23:00:16 +01:00
|
|
|
* @tbl: the &struct bucket_table
|
|
|
|
* @hash: the hash value / bucket index
|
2014-08-02 11:47:44 +02:00
|
|
|
*/
|
2019-03-21 14:42:40 +11:00
|
|
|
#define rht_for_each_from(pos, head, tbl, hash) \
|
2019-04-12 11:52:08 +10:00
|
|
|
for (pos = head; \
|
|
|
|
!rht_is_a_nulls(pos); \
|
2015-01-02 23:00:16 +01:00
|
|
|
pos = rht_dereference_bucket((pos)->next, tbl, hash))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rht_for_each - iterate over hash chain
|
|
|
|
* @pos: the &struct rhash_head to use as a loop cursor.
|
|
|
|
* @tbl: the &struct bucket_table
|
|
|
|
* @hash: the hash value / bucket index
|
|
|
|
*/
|
|
|
|
#define rht_for_each(pos, tbl, hash) \
|
2019-04-12 11:52:08 +10:00
|
|
|
rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
|
|
|
|
tbl, hash)
|
2015-01-02 23:00:16 +01:00
|
|
|
|
|
|
|
/**
|
2019-03-21 14:42:40 +11:00
|
|
|
* rht_for_each_entry_from - iterate over hash chain from given head
|
2015-01-02 23:00:16 +01:00
|
|
|
* @tpos: the type * to use as a loop cursor.
|
|
|
|
* @pos: the &struct rhash_head to use as a loop cursor.
|
2019-03-21 14:42:40 +11:00
|
|
|
* @head: the &struct rhash_head to start from
|
2015-01-02 23:00:16 +01:00
|
|
|
* @tbl: the &struct bucket_table
|
|
|
|
* @hash: the hash value / bucket index
|
|
|
|
* @member: name of the &struct rhash_head within the hashable struct.
|
|
|
|
*/
|
2019-03-21 14:42:40 +11:00
|
|
|
#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \
|
2019-04-12 11:52:08 +10:00
|
|
|
for (pos = head; \
|
2015-01-02 23:00:21 +01:00
|
|
|
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
|
2015-01-02 23:00:16 +01:00
|
|
|
pos = rht_dereference_bucket((pos)->next, tbl, hash))
|
2014-08-02 11:47:44 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* rht_for_each_entry - iterate over hash chain of given type
|
2015-01-02 23:00:16 +01:00
|
|
|
* @tpos: the type * to use as a loop cursor.
|
|
|
|
* @pos: the &struct rhash_head to use as a loop cursor.
|
|
|
|
* @tbl: the &struct bucket_table
|
|
|
|
* @hash: the hash value / bucket index
|
|
|
|
* @member: name of the &struct rhash_head within the hashable struct.
|
2014-08-02 11:47:44 +02:00
|
|
|
*/
|
2015-01-02 23:00:16 +01:00
|
|
|
#define rht_for_each_entry(tpos, pos, tbl, hash, member) \
|
2019-04-12 11:52:08 +10:00
|
|
|
rht_for_each_entry_from(tpos, pos, \
|
|
|
|
rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
|
|
|
|
tbl, hash, member)
|
2014-08-02 11:47:44 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* rht_for_each_entry_safe - safely iterate over hash chain of given type
|
2015-01-02 23:00:16 +01:00
|
|
|
* @tpos: the type * to use as a loop cursor.
|
|
|
|
* @pos: the &struct rhash_head to use as a loop cursor.
|
|
|
|
* @next: the &struct rhash_head to use as next in loop cursor.
|
|
|
|
* @tbl: the &struct bucket_table
|
|
|
|
* @hash: the hash value / bucket index
|
|
|
|
* @member: name of the &struct rhash_head within the hashable struct.
|
2014-08-02 11:47:44 +02:00
|
|
|
*
|
|
|
|
* This hash chain list-traversal primitive allows for the looped code to
|
|
|
|
* remove the loop cursor from the list.
|
|
|
|
*/
|
2017-02-11 19:26:47 +08:00
|
|
|
#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
|
2019-04-12 11:52:08 +10:00
|
|
|
for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
|
2017-02-11 19:26:47 +08:00
|
|
|
next = !rht_is_a_nulls(pos) ? \
|
|
|
|
rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
|
|
|
|
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
|
|
|
|
pos = next, \
|
|
|
|
next = !rht_is_a_nulls(pos) ? \
|
2015-01-21 11:12:13 +00:00
|
|
|
rht_dereference_bucket(pos->next, tbl, hash) : NULL)
|
2015-01-02 23:00:16 +01:00
|
|
|
|
|
|
|
/**
|
2019-03-21 14:42:40 +11:00
|
|
|
* rht_for_each_rcu_from - iterate over rcu hash chain from given head
|
2015-01-02 23:00:16 +01:00
|
|
|
* @pos: the &struct rhash_head to use as a loop cursor.
|
2019-03-21 14:42:40 +11:00
|
|
|
* @head: the &struct rhash_head to start from
|
2015-01-02 23:00:16 +01:00
|
|
|
* @tbl: the &struct bucket_table
|
|
|
|
* @hash: the hash value / bucket index
|
|
|
|
*
|
|
|
|
* This hash chain list-traversal primitive may safely run concurrently with
|
|
|
|
* the _rcu mutation primitives such as rhashtable_insert() as long as the
|
|
|
|
* traversal is guarded by rcu_read_lock().
|
|
|
|
*/
|
2019-03-21 14:42:40 +11:00
|
|
|
#define rht_for_each_rcu_from(pos, head, tbl, hash) \
|
2015-01-02 23:00:16 +01:00
|
|
|
for (({barrier(); }), \
|
2019-04-12 11:52:08 +10:00
|
|
|
pos = head; \
|
2015-01-02 23:00:21 +01:00
|
|
|
!rht_is_a_nulls(pos); \
|
2015-01-02 23:00:16 +01:00
|
|
|
pos = rcu_dereference_raw(pos->next))
|
2014-08-02 11:47:44 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* rht_for_each_rcu - iterate over rcu hash chain
|
2015-01-02 23:00:16 +01:00
|
|
|
* @pos: the &struct rhash_head to use as a loop cursor.
|
|
|
|
* @tbl: the &struct bucket_table
|
|
|
|
* @hash: the hash value / bucket index
|
2014-08-02 11:47:44 +02:00
|
|
|
*
|
|
|
|
* This hash chain list-traversal primitive may safely run concurrently with
|
2015-01-02 23:00:16 +01:00
|
|
|
* the _rcu mutation primitives such as rhashtable_insert() as long as the
|
2014-08-02 11:47:44 +02:00
|
|
|
* traversal is guarded by rcu_read_lock().
|
|
|
|
*/
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
#define rht_for_each_rcu(pos, tbl, hash) \
|
2019-04-12 11:52:08 +10:00
|
|
|
for (({barrier(); }), \
|
2019-05-28 15:02:31 +08:00
|
|
|
pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \
|
2019-04-12 11:52:08 +10:00
|
|
|
!rht_is_a_nulls(pos); \
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
pos = rcu_dereference_raw(pos->next))
|
2015-01-02 23:00:16 +01:00
|
|
|
|
|
|
|
/**
|
2019-03-21 14:42:40 +11:00
|
|
|
* rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head
|
2015-01-02 23:00:16 +01:00
|
|
|
* @tpos: the type * to use as a loop cursor.
|
|
|
|
* @pos: the &struct rhash_head to use as a loop cursor.
|
2019-03-21 14:42:40 +11:00
|
|
|
* @head: the &struct rhash_head to start from
|
2015-01-02 23:00:16 +01:00
|
|
|
* @tbl: the &struct bucket_table
|
|
|
|
* @hash: the hash value / bucket index
|
|
|
|
* @member: name of the &struct rhash_head within the hashable struct.
|
|
|
|
*
|
|
|
|
* This hash chain list-traversal primitive may safely run concurrently with
|
|
|
|
* the _rcu mutation primitives such as rhashtable_insert() as long as the
|
|
|
|
* traversal is guarded by rcu_read_lock().
|
|
|
|
*/
|
2019-03-21 14:42:40 +11:00
|
|
|
#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
|
2015-01-02 23:00:16 +01:00
|
|
|
for (({barrier(); }), \
|
2019-04-12 11:52:08 +10:00
|
|
|
pos = head; \
|
2015-01-02 23:00:21 +01:00
|
|
|
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
|
2015-01-02 23:00:16 +01:00
|
|
|
pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
|
2014-08-02 11:47:44 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* rht_for_each_entry_rcu - iterate over rcu hash chain of given type
|
2015-01-02 23:00:16 +01:00
|
|
|
* @tpos: the type * to use as a loop cursor.
|
|
|
|
* @pos: the &struct rhash_head to use as a loop cursor.
|
|
|
|
* @tbl: the &struct bucket_table
|
|
|
|
* @hash: the hash value / bucket index
|
|
|
|
* @member: name of the &struct rhash_head within the hashable struct.
|
2014-08-02 11:47:44 +02:00
|
|
|
*
|
|
|
|
* This hash chain list-traversal primitive may safely run concurrently with
|
2015-01-02 23:00:16 +01:00
|
|
|
* the _rcu mutation primitives such as rhashtable_insert() as long as the
|
2014-08-02 11:47:44 +02:00
|
|
|
* traversal is guarded by rcu_read_lock().
|
|
|
|
*/
|
2017-02-11 19:26:47 +08:00
|
|
|
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
rht_for_each_entry_rcu_from(tpos, pos, \
|
2019-05-28 15:02:31 +08:00
|
|
|
rht_ptr_rcu(rht_bucket(tbl, hash)), \
|
2019-04-12 11:52:08 +10:00
|
|
|
tbl, hash, member)
|
2014-08-02 11:47:44 +02:00
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
/**
|
|
|
|
* rhl_for_each_rcu - iterate over rcu hash table list
|
|
|
|
* @pos: the &struct rlist_head to use as a loop cursor.
|
|
|
|
* @list: the head of the list
|
|
|
|
*
|
|
|
|
* This hash chain list-traversal primitive should be used on the
|
|
|
|
* list returned by rhltable_lookup.
|
|
|
|
*/
|
|
|
|
#define rhl_for_each_rcu(pos, list) \
|
|
|
|
for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
|
|
|
|
* @tpos: the type * to use as a loop cursor.
|
|
|
|
* @pos: the &struct rlist_head to use as a loop cursor.
|
|
|
|
* @list: the head of the list
|
|
|
|
* @member: name of the &struct rlist_head within the hashable struct.
|
|
|
|
*
|
|
|
|
* This hash chain list-traversal primitive should be used on the
|
|
|
|
* list returned by rhltable_lookup.
|
|
|
|
*/
|
|
|
|
#define rhl_for_each_entry_rcu(tpos, pos, list, member) \
|
|
|
|
for (pos = list; pos && rht_entry(tpos, pos, member); \
|
|
|
|
pos = rcu_dereference_raw(pos->next))
|
|
|
|
|
2015-03-20 21:57:00 +11:00
|
|
|
static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
|
|
|
|
const void *obj)
|
|
|
|
{
|
|
|
|
struct rhashtable *ht = arg->ht;
|
|
|
|
const char *ptr = obj;
|
|
|
|
|
|
|
|
return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
|
|
|
|
}
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
/* Internal function, do not use. */
|
|
|
|
static inline struct rhash_head *__rhashtable_lookup(
|
2015-03-20 21:57:00 +11:00
|
|
|
struct rhashtable *ht, const void *key,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
struct rhashtable_compare_arg arg = {
|
|
|
|
.ht = ht,
|
|
|
|
.key = key,
|
|
|
|
};
|
2019-05-16 15:19:46 +08:00
|
|
|
struct rhash_lock_head *const *bkt;
|
2017-02-11 19:26:47 +08:00
|
|
|
struct bucket_table *tbl;
|
2015-03-20 21:57:00 +11:00
|
|
|
struct rhash_head *he;
|
2015-03-24 14:18:17 +01:00
|
|
|
unsigned int hash;
|
2015-03-20 21:57:00 +11:00
|
|
|
|
|
|
|
tbl = rht_dereference_rcu(ht->tbl, ht);
|
|
|
|
restart:
|
|
|
|
hash = rht_key_hashfn(ht, tbl, key, params);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
bkt = rht_bucket(tbl, hash);
|
2018-11-30 10:26:50 +11:00
|
|
|
do {
|
2019-05-28 15:02:31 +08:00
|
|
|
rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
|
2018-11-30 10:26:50 +11:00
|
|
|
if (params.obj_cmpfn ?
|
|
|
|
params.obj_cmpfn(&arg, rht_obj(ht, he)) :
|
|
|
|
rhashtable_compare(&arg, rht_obj(ht, he)))
|
|
|
|
continue;
|
|
|
|
return he;
|
|
|
|
}
|
|
|
|
/* An object might have been moved to a different hash chain,
|
|
|
|
* while we walk along it - better check and retry.
|
|
|
|
*/
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
} while (he != RHT_NULLS_MARKER(bkt));
|
2015-03-20 21:57:00 +11:00
|
|
|
|
|
|
|
/* Ensure we see any new tables. */
|
|
|
|
smp_rmb();
|
|
|
|
|
|
|
|
tbl = rht_dereference_rcu(tbl->future_tbl, ht);
|
|
|
|
if (unlikely(tbl))
|
|
|
|
goto restart;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
/**
|
|
|
|
* rhashtable_lookup - search hash table
|
|
|
|
* @ht: hash table
|
|
|
|
* @key: the pointer to the key
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
|
|
|
* Computes the hash value for the key and traverses the bucket chain looking
|
|
|
|
* for a entry with an identical key. The first matching entry is returned.
|
|
|
|
*
|
|
|
|
* This must only be called under the RCU read lock.
|
|
|
|
*
|
|
|
|
* Returns the first entry on which the compare function returned true.
|
|
|
|
*/
|
|
|
|
static inline void *rhashtable_lookup(
|
|
|
|
struct rhashtable *ht, const void *key,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
struct rhash_head *he = __rhashtable_lookup(ht, key, params);
|
|
|
|
|
|
|
|
return he ? rht_obj(ht, he) : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rhashtable_lookup_fast - search hash table, without RCU read lock
|
|
|
|
* @ht: hash table
|
|
|
|
* @key: the pointer to the key
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
|
|
|
* Computes the hash value for the key and traverses the bucket chain looking
|
|
|
|
* for a entry with an identical key. The first matching entry is returned.
|
|
|
|
*
|
|
|
|
* Only use this function when you have other mechanisms guaranteeing
|
|
|
|
* that the object won't go away after the RCU read lock is released.
|
|
|
|
*
|
|
|
|
* Returns the first entry on which the compare function returned true.
|
|
|
|
*/
|
|
|
|
static inline void *rhashtable_lookup_fast(
|
|
|
|
struct rhashtable *ht, const void *key,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
void *obj;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
obj = rhashtable_lookup(ht, key, params);
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rhltable_lookup - search hash list table
|
|
|
|
* @hlt: hash table
|
|
|
|
* @key: the pointer to the key
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
|
|
|
* Computes the hash value for the key and traverses the bucket chain looking
|
|
|
|
* for a entry with an identical key. All matching entries are returned
|
|
|
|
* in a list.
|
|
|
|
*
|
|
|
|
* This must only be called under the RCU read lock.
|
|
|
|
*
|
|
|
|
* Returns the list of entries that match the given key.
|
|
|
|
*/
|
|
|
|
static inline struct rhlist_head *rhltable_lookup(
|
|
|
|
struct rhltable *hlt, const void *key,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
|
|
|
|
|
|
|
|
return he ? container_of(he, struct rhlist_head, rhead) : NULL;
|
|
|
|
}
|
|
|
|
|
2016-08-24 12:31:31 +02:00
|
|
|
/* Internal function, please use rhashtable_insert_fast() instead. This
|
|
|
|
* function returns the existing element already in hashes in there is a clash,
|
|
|
|
* otherwise it returns an error via ERR_PTR().
|
|
|
|
*/
|
|
|
|
static inline void *__rhashtable_insert_fast(
|
2015-03-20 21:57:00 +11:00
|
|
|
struct rhashtable *ht, const void *key, struct rhash_head *obj,
|
2016-09-19 19:00:09 +08:00
|
|
|
const struct rhashtable_params params, bool rhlist)
|
2015-03-20 21:57:00 +11:00
|
|
|
{
|
|
|
|
struct rhashtable_compare_arg arg = {
|
|
|
|
.ht = ht,
|
|
|
|
.key = key,
|
|
|
|
};
|
2019-05-16 15:19:46 +08:00
|
|
|
struct rhash_lock_head **bkt;
|
2016-09-19 19:00:09 +08:00
|
|
|
struct rhash_head __rcu **pprev;
|
|
|
|
struct bucket_table *tbl;
|
2015-03-20 21:57:00 +11:00
|
|
|
struct rhash_head *head;
|
2015-03-24 14:18:17 +01:00
|
|
|
unsigned int hash;
|
2016-09-19 19:00:09 +08:00
|
|
|
int elasticity;
|
|
|
|
void *data;
|
2015-03-20 21:57:00 +11:00
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
tbl = rht_dereference_rcu(ht->tbl, ht);
|
2016-09-19 19:00:09 +08:00
|
|
|
hash = rht_head_hashfn(ht, tbl, obj, params);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
elasticity = RHT_ELASTICITY;
|
|
|
|
bkt = rht_bucket_insert(ht, tbl, hash);
|
|
|
|
data = ERR_PTR(-ENOMEM);
|
|
|
|
if (!bkt)
|
|
|
|
goto out;
|
|
|
|
pprev = NULL;
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_lock(tbl, bkt);
|
2015-03-20 21:57:00 +11:00
|
|
|
|
2018-06-18 12:52:50 +10:00
|
|
|
if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
|
2016-09-19 19:00:09 +08:00
|
|
|
slow_path:
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_unlock(tbl, bkt);
|
2016-09-19 19:00:09 +08:00
|
|
|
rcu_read_unlock();
|
|
|
|
return rhashtable_insert_slow(ht, key, obj);
|
2015-03-24 00:50:26 +11:00
|
|
|
}
|
|
|
|
|
2019-04-12 11:52:08 +10:00
|
|
|
rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
|
2016-09-19 19:00:09 +08:00
|
|
|
struct rhlist_head *plist;
|
|
|
|
struct rhlist_head *list;
|
|
|
|
|
|
|
|
elasticity--;
|
|
|
|
if (!key ||
|
|
|
|
(params.obj_cmpfn ?
|
|
|
|
params.obj_cmpfn(&arg, rht_obj(ht, head)) :
|
2018-03-04 17:29:48 +02:00
|
|
|
rhashtable_compare(&arg, rht_obj(ht, head)))) {
|
|
|
|
pprev = &head->next;
|
2016-09-19 19:00:09 +08:00
|
|
|
continue;
|
2018-03-04 17:29:48 +02:00
|
|
|
}
|
2016-09-19 19:00:09 +08:00
|
|
|
|
|
|
|
data = rht_obj(ht, head);
|
2015-12-03 20:41:29 +08:00
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
if (!rhlist)
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
goto out_unlock;
|
2016-08-24 12:31:31 +02:00
|
|
|
|
2015-03-20 21:57:00 +11:00
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
list = container_of(obj, struct rhlist_head, rhead);
|
|
|
|
plist = container_of(head, struct rhlist_head, rhead);
|
2015-05-15 11:30:47 +08:00
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
RCU_INIT_POINTER(list->next, plist);
|
|
|
|
head = rht_dereference_bucket(head->next, tbl, hash);
|
|
|
|
RCU_INIT_POINTER(list->rhead.next, head);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
if (pprev) {
|
|
|
|
rcu_assign_pointer(*pprev, obj);
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_unlock(tbl, bkt);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
} else
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_assign_unlock(tbl, bkt, obj);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
data = NULL;
|
|
|
|
goto out;
|
2015-03-24 00:50:28 +11:00
|
|
|
}
|
2015-03-20 21:57:00 +11:00
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
if (elasticity <= 0)
|
|
|
|
goto slow_path;
|
|
|
|
|
|
|
|
data = ERR_PTR(-E2BIG);
|
|
|
|
if (unlikely(rht_grow_above_max(ht, tbl)))
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
goto out_unlock;
|
2016-09-19 19:00:09 +08:00
|
|
|
|
|
|
|
if (unlikely(rht_grow_above_100(ht, tbl)))
|
|
|
|
goto slow_path;
|
2015-03-20 21:57:00 +11:00
|
|
|
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
/* Inserting at head of list makes unlocking free. */
|
2019-04-12 11:52:08 +10:00
|
|
|
head = rht_ptr(bkt, tbl, hash);
|
2015-03-20 21:57:00 +11:00
|
|
|
|
|
|
|
RCU_INIT_POINTER(obj->next, head);
|
2016-09-19 19:00:09 +08:00
|
|
|
if (rhlist) {
|
|
|
|
struct rhlist_head *list;
|
|
|
|
|
|
|
|
list = container_of(obj, struct rhlist_head, rhead);
|
|
|
|
RCU_INIT_POINTER(list->next, NULL);
|
|
|
|
}
|
2015-03-20 21:57:00 +11:00
|
|
|
|
|
|
|
atomic_inc(&ht->nelems);
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_assign_unlock(tbl, bkt, obj);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
|
2015-03-20 21:57:00 +11:00
|
|
|
if (rht_grow_above_75(ht, tbl))
|
|
|
|
schedule_work(&ht->run_work);
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
data = NULL;
|
2015-03-20 21:57:00 +11:00
|
|
|
out:
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
return data;
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
|
|
|
|
out_unlock:
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_unlock(tbl, bkt);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
goto out;
|
2015-03-20 21:57:00 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rhashtable_insert_fast - insert object into hash table
|
|
|
|
* @ht: hash table
|
|
|
|
* @obj: pointer to hash head inside object
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* Will take the per bucket bitlock to protect against mutual mutations
|
2015-03-20 21:57:00 +11:00
|
|
|
* on the same bucket. Multiple insertions may occur in parallel unless
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* they map to the same bucket.
|
2015-03-20 21:57:00 +11:00
|
|
|
*
|
|
|
|
* It is safe to call this function from atomic context.
|
|
|
|
*
|
2018-04-24 08:29:13 +10:00
|
|
|
* Will trigger an automatic deferred table resizing if residency in the
|
|
|
|
* table grows beyond 70%.
|
2015-03-20 21:57:00 +11:00
|
|
|
*/
|
|
|
|
static inline int rhashtable_insert_fast(
|
|
|
|
struct rhashtable *ht, struct rhash_head *obj,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
2016-08-24 12:31:31 +02:00
|
|
|
void *ret;
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
|
2016-08-24 12:31:31 +02:00
|
|
|
if (IS_ERR(ret))
|
|
|
|
return PTR_ERR(ret);
|
|
|
|
|
|
|
|
return ret == NULL ? 0 : -EEXIST;
|
2015-03-20 21:57:00 +11:00
|
|
|
}
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
/**
|
|
|
|
* rhltable_insert_key - insert object into hash list table
|
|
|
|
* @hlt: hash list table
|
|
|
|
* @key: the pointer to the key
|
|
|
|
* @list: pointer to hash list head inside object
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* Will take the per bucket bitlock to protect against mutual mutations
|
2016-09-19 19:00:09 +08:00
|
|
|
* on the same bucket. Multiple insertions may occur in parallel unless
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* they map to the same bucket.
|
2016-09-19 19:00:09 +08:00
|
|
|
*
|
|
|
|
* It is safe to call this function from atomic context.
|
|
|
|
*
|
2018-04-24 08:29:13 +10:00
|
|
|
* Will trigger an automatic deferred table resizing if residency in the
|
|
|
|
* table grows beyond 70%.
|
2016-09-19 19:00:09 +08:00
|
|
|
*/
|
|
|
|
static inline int rhltable_insert_key(
|
|
|
|
struct rhltable *hlt, const void *key, struct rhlist_head *list,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
|
|
|
|
params, true));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rhltable_insert - insert object into hash list table
|
|
|
|
* @hlt: hash list table
|
|
|
|
* @list: pointer to hash list head inside object
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* Will take the per bucket bitlock to protect against mutual mutations
|
2016-09-19 19:00:09 +08:00
|
|
|
* on the same bucket. Multiple insertions may occur in parallel unless
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
* they map to the same bucket.
|
2016-09-19 19:00:09 +08:00
|
|
|
*
|
|
|
|
* It is safe to call this function from atomic context.
|
|
|
|
*
|
2018-04-24 08:29:13 +10:00
|
|
|
* Will trigger an automatic deferred table resizing if residency in the
|
|
|
|
* table grows beyond 70%.
|
2016-09-19 19:00:09 +08:00
|
|
|
*/
|
|
|
|
static inline int rhltable_insert(
|
|
|
|
struct rhltable *hlt, struct rhlist_head *list,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
const char *key = rht_obj(&hlt->ht, &list->rhead);
|
|
|
|
|
|
|
|
key += params.key_offset;
|
|
|
|
|
|
|
|
return rhltable_insert_key(hlt, key, list, params);
|
|
|
|
}
|
|
|
|
|
2015-03-20 21:57:00 +11:00
|
|
|
/**
|
|
|
|
* rhashtable_lookup_insert_fast - lookup and insert object into hash table
|
|
|
|
* @ht: hash table
|
|
|
|
* @obj: pointer to hash head inside object
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
|
|
|
* This lookup function may only be used for fixed key hash table (key_len
|
|
|
|
* parameter set). It will BUG() if used inappropriately.
|
|
|
|
*
|
|
|
|
* It is safe to call this function from atomic context.
|
|
|
|
*
|
2018-04-24 08:29:13 +10:00
|
|
|
* Will trigger an automatic deferred table resizing if residency in the
|
|
|
|
* table grows beyond 70%.
|
2015-03-20 21:57:00 +11:00
|
|
|
*/
|
|
|
|
static inline int rhashtable_lookup_insert_fast(
|
|
|
|
struct rhashtable *ht, struct rhash_head *obj,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
const char *key = rht_obj(ht, obj);
|
2016-08-24 12:31:31 +02:00
|
|
|
void *ret;
|
2015-03-20 21:57:00 +11:00
|
|
|
|
|
|
|
BUG_ON(ht->p.obj_hashfn);
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
|
|
|
|
false);
|
2016-08-24 12:31:31 +02:00
|
|
|
if (IS_ERR(ret))
|
|
|
|
return PTR_ERR(ret);
|
|
|
|
|
|
|
|
return ret == NULL ? 0 : -EEXIST;
|
2015-03-20 21:57:00 +11:00
|
|
|
}
|
|
|
|
|
2017-03-18 00:36:15 +01:00
|
|
|
/**
|
|
|
|
* rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
|
|
|
|
* @ht: hash table
|
|
|
|
* @obj: pointer to hash head inside object
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
|
|
|
* Just like rhashtable_lookup_insert_fast(), but this function returns the
|
|
|
|
* object if it exists, NULL if it did not and the insertion was successful,
|
|
|
|
* and an ERR_PTR otherwise.
|
|
|
|
*/
|
|
|
|
static inline void *rhashtable_lookup_get_insert_fast(
|
|
|
|
struct rhashtable *ht, struct rhash_head *obj,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
const char *key = rht_obj(ht, obj);
|
|
|
|
|
|
|
|
BUG_ON(ht->p.obj_hashfn);
|
|
|
|
|
|
|
|
return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
|
|
|
|
false);
|
|
|
|
}
|
|
|
|
|
2015-03-20 21:57:00 +11:00
|
|
|
/**
|
|
|
|
* rhashtable_lookup_insert_key - search and insert object to hash table
|
|
|
|
* with explicit key
|
|
|
|
* @ht: hash table
|
|
|
|
* @key: key
|
|
|
|
* @obj: pointer to hash head inside object
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
|
|
|
* Lookups may occur in parallel with hashtable mutations and resizing.
|
|
|
|
*
|
2018-04-24 08:29:13 +10:00
|
|
|
* Will trigger an automatic deferred table resizing if residency in the
|
|
|
|
* table grows beyond 70%.
|
2015-03-20 21:57:00 +11:00
|
|
|
*
|
|
|
|
* Returns zero on success.
|
|
|
|
*/
|
|
|
|
static inline int rhashtable_lookup_insert_key(
|
|
|
|
struct rhashtable *ht, const void *key, struct rhash_head *obj,
|
|
|
|
const struct rhashtable_params params)
|
2016-08-24 12:31:31 +02:00
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
|
|
|
|
BUG_ON(!ht->p.obj_hashfn || !key);
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
ret = __rhashtable_insert_fast(ht, key, obj, params, false);
|
2016-08-24 12:31:31 +02:00
|
|
|
if (IS_ERR(ret))
|
|
|
|
return PTR_ERR(ret);
|
|
|
|
|
|
|
|
return ret == NULL ? 0 : -EEXIST;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rhashtable_lookup_get_insert_key - lookup and insert object into hash table
|
|
|
|
* @ht: hash table
|
|
|
|
* @obj: pointer to hash head inside object
|
|
|
|
* @params: hash table parameters
|
|
|
|
* @data: pointer to element data already in hashes
|
|
|
|
*
|
|
|
|
* Just like rhashtable_lookup_insert_key(), but this function returns the
|
|
|
|
* object if it exists, NULL if it does not and the insertion was successful,
|
|
|
|
* and an ERR_PTR otherwise.
|
|
|
|
*/
|
|
|
|
static inline void *rhashtable_lookup_get_insert_key(
|
|
|
|
struct rhashtable *ht, const void *key, struct rhash_head *obj,
|
|
|
|
const struct rhashtable_params params)
|
2015-03-20 21:57:00 +11:00
|
|
|
{
|
|
|
|
BUG_ON(!ht->p.obj_hashfn || !key);
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
return __rhashtable_insert_fast(ht, key, obj, params, false);
|
2015-03-20 21:57:00 +11:00
|
|
|
}
|
|
|
|
|
2015-03-24 14:18:18 +01:00
|
|
|
/* Internal function, please use rhashtable_remove_fast() instead */
|
2016-09-19 19:00:09 +08:00
|
|
|
static inline int __rhashtable_remove_fast_one(
|
2015-03-20 21:57:00 +11:00
|
|
|
struct rhashtable *ht, struct bucket_table *tbl,
|
2016-09-19 19:00:09 +08:00
|
|
|
struct rhash_head *obj, const struct rhashtable_params params,
|
|
|
|
bool rhlist)
|
2015-03-20 21:57:00 +11:00
|
|
|
{
|
2019-05-16 15:19:46 +08:00
|
|
|
struct rhash_lock_head **bkt;
|
2015-03-20 21:57:00 +11:00
|
|
|
struct rhash_head __rcu **pprev;
|
|
|
|
struct rhash_head *he;
|
2015-03-24 14:18:17 +01:00
|
|
|
unsigned int hash;
|
2015-03-20 21:57:00 +11:00
|
|
|
int err = -ENOENT;
|
|
|
|
|
|
|
|
hash = rht_head_hashfn(ht, tbl, obj, params);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
bkt = rht_bucket_var(tbl, hash);
|
|
|
|
if (!bkt)
|
|
|
|
return -ENOENT;
|
|
|
|
pprev = NULL;
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_lock(tbl, bkt);
|
2015-03-20 21:57:00 +11:00
|
|
|
|
2019-04-12 11:52:08 +10:00
|
|
|
rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
|
2016-09-19 19:00:09 +08:00
|
|
|
struct rhlist_head *list;
|
|
|
|
|
|
|
|
list = container_of(he, struct rhlist_head, rhead);
|
|
|
|
|
2015-03-20 21:57:00 +11:00
|
|
|
if (he != obj) {
|
2016-09-19 19:00:09 +08:00
|
|
|
struct rhlist_head __rcu **lpprev;
|
|
|
|
|
2015-03-20 21:57:00 +11:00
|
|
|
pprev = &he->next;
|
2016-09-19 19:00:09 +08:00
|
|
|
|
|
|
|
if (!rhlist)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
do {
|
|
|
|
lpprev = &list->next;
|
|
|
|
list = rht_dereference_bucket(list->next,
|
|
|
|
tbl, hash);
|
|
|
|
} while (list && obj != &list->rhead);
|
|
|
|
|
|
|
|
if (!list)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
list = rht_dereference_bucket(list->next, tbl, hash);
|
|
|
|
RCU_INIT_POINTER(*lpprev, list);
|
|
|
|
err = 0;
|
|
|
|
break;
|
2015-03-20 21:57:00 +11:00
|
|
|
}
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
obj = rht_dereference_bucket(obj->next, tbl, hash);
|
|
|
|
err = 1;
|
|
|
|
|
|
|
|
if (rhlist) {
|
|
|
|
list = rht_dereference_bucket(list->next, tbl, hash);
|
|
|
|
if (list) {
|
|
|
|
RCU_INIT_POINTER(list->rhead.next, obj);
|
|
|
|
obj = &list->rhead;
|
|
|
|
err = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
if (pprev) {
|
|
|
|
rcu_assign_pointer(*pprev, obj);
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_unlock(tbl, bkt);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
} else {
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_assign_unlock(tbl, bkt, obj);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
}
|
|
|
|
goto unlocked;
|
2015-03-20 21:57:00 +11:00
|
|
|
}
|
|
|
|
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_unlock(tbl, bkt);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
unlocked:
|
2016-09-19 19:00:09 +08:00
|
|
|
if (err > 0) {
|
|
|
|
atomic_dec(&ht->nelems);
|
|
|
|
if (unlikely(ht->p.automatic_shrinking &&
|
|
|
|
rht_shrink_below_30(ht, tbl)))
|
|
|
|
schedule_work(&ht->run_work);
|
|
|
|
err = 0;
|
|
|
|
}
|
|
|
|
|
2015-03-20 21:57:00 +11:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
/* Internal function, please use rhashtable_remove_fast() instead */
|
|
|
|
static inline int __rhashtable_remove_fast(
|
2015-03-20 21:57:00 +11:00
|
|
|
struct rhashtable *ht, struct rhash_head *obj,
|
2016-09-19 19:00:09 +08:00
|
|
|
const struct rhashtable_params params, bool rhlist)
|
2015-03-20 21:57:00 +11:00
|
|
|
{
|
|
|
|
struct bucket_table *tbl;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
tbl = rht_dereference_rcu(ht->tbl, ht);
|
|
|
|
|
|
|
|
/* Because we have already taken (and released) the bucket
|
|
|
|
* lock in old_tbl, if we find that future_tbl is not yet
|
|
|
|
* visible then that guarantees the entry to still be in
|
|
|
|
* the old tbl if it exists.
|
|
|
|
*/
|
2016-09-19 19:00:09 +08:00
|
|
|
while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
|
|
|
|
rhlist)) &&
|
2015-03-20 21:57:00 +11:00
|
|
|
(tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
|
|
|
|
;
|
|
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
/**
|
|
|
|
* rhashtable_remove_fast - remove object from hash table
|
|
|
|
* @ht: hash table
|
|
|
|
* @obj: pointer to hash head inside object
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
|
|
|
* Since the hash chain is single linked, the removal operation needs to
|
|
|
|
* walk the bucket chain upon removal. The removal operation is thus
|
|
|
|
* considerable slow if the hash table is not correctly sized.
|
|
|
|
*
|
2018-04-24 08:29:13 +10:00
|
|
|
* Will automatically shrink the table if permitted when residency drops
|
|
|
|
* below 30%.
|
2016-09-19 19:00:09 +08:00
|
|
|
*
|
|
|
|
* Returns zero on success, -ENOENT if the entry could not be found.
|
|
|
|
*/
|
|
|
|
static inline int rhashtable_remove_fast(
|
|
|
|
struct rhashtable *ht, struct rhash_head *obj,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
return __rhashtable_remove_fast(ht, obj, params, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rhltable_remove - remove object from hash list table
|
|
|
|
* @hlt: hash list table
|
|
|
|
* @list: pointer to hash list head inside object
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
|
|
|
* Since the hash chain is single linked, the removal operation needs to
|
|
|
|
* walk the bucket chain upon removal. The removal operation is thus
|
|
|
|
* considerable slow if the hash table is not correctly sized.
|
|
|
|
*
|
2018-04-24 08:29:13 +10:00
|
|
|
* Will automatically shrink the table if permitted when residency drops
|
|
|
|
* below 30%
|
2016-09-19 19:00:09 +08:00
|
|
|
*
|
|
|
|
* Returns zero on success, -ENOENT if the entry could not be found.
|
|
|
|
*/
|
|
|
|
static inline int rhltable_remove(
|
|
|
|
struct rhltable *hlt, struct rhlist_head *list,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
|
|
|
|
}
|
|
|
|
|
2015-12-15 15:41:36 -08:00
|
|
|
/* Internal function, please use rhashtable_replace_fast() instead */
|
|
|
|
static inline int __rhashtable_replace_fast(
|
|
|
|
struct rhashtable *ht, struct bucket_table *tbl,
|
|
|
|
struct rhash_head *obj_old, struct rhash_head *obj_new,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
2019-05-16 15:19:46 +08:00
|
|
|
struct rhash_lock_head **bkt;
|
2015-12-15 15:41:36 -08:00
|
|
|
struct rhash_head __rcu **pprev;
|
|
|
|
struct rhash_head *he;
|
|
|
|
unsigned int hash;
|
|
|
|
int err = -ENOENT;
|
|
|
|
|
|
|
|
/* Minimally, the old and new objects must have same hash
|
|
|
|
* (which should mean identifiers are the same).
|
|
|
|
*/
|
|
|
|
hash = rht_head_hashfn(ht, tbl, obj_old, params);
|
|
|
|
if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
|
|
|
|
return -EINVAL;
|
|
|
|
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
bkt = rht_bucket_var(tbl, hash);
|
|
|
|
if (!bkt)
|
|
|
|
return -ENOENT;
|
2015-12-15 15:41:36 -08:00
|
|
|
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
pprev = NULL;
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_lock(tbl, bkt);
|
2015-12-15 15:41:36 -08:00
|
|
|
|
2019-04-12 11:52:08 +10:00
|
|
|
rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
|
2015-12-15 15:41:36 -08:00
|
|
|
if (he != obj_old) {
|
|
|
|
pprev = &he->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
rcu_assign_pointer(obj_new->next, obj_old->next);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
if (pprev) {
|
|
|
|
rcu_assign_pointer(*pprev, obj_new);
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_unlock(tbl, bkt);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
} else {
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_assign_unlock(tbl, bkt, obj_new);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
}
|
2015-12-15 15:41:36 -08:00
|
|
|
err = 0;
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
goto unlocked;
|
2015-12-15 15:41:36 -08:00
|
|
|
}
|
|
|
|
|
2019-04-02 10:07:45 +11:00
|
|
|
rht_unlock(tbl, bkt);
|
rhashtable: use bit_spin_locks to protect hash bucket.
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.
The benefits of a bit spin_lock are:
- no need to allocate a separate array of locks.
- no need to have a configuration option to guide the
choice of the size of this array
- locking cost is often a single test-and-set in a cache line
that will have to be loaded anyway. When inserting at, or removing
from, the head of the chain, the unlock is free - writing the new
address in the bucket head implicitly clears the lock bit.
For __rhashtable_insert_fast() we ensure this always happens
when adding a new key.
- even when lockings costs 2 updates (lock and unlock), they are
in a cacheline that needs to be read anyway.
The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.
Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved. This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.
As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.
To enhance type checking, a new struct is introduced to represent the
pointer plus lock-bit
that is stored in the bucket-table. This is "struct rhash_lock_head"
and is empty. A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".
Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head. As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-02 10:07:45 +11:00
|
|
|
|
|
|
|
unlocked:
|
2015-12-15 15:41:36 -08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rhashtable_replace_fast - replace an object in hash table
|
|
|
|
* @ht: hash table
|
|
|
|
* @obj_old: pointer to hash head inside object being replaced
|
|
|
|
* @obj_new: pointer to hash head inside object which is new
|
|
|
|
* @params: hash table parameters
|
|
|
|
*
|
|
|
|
* Replacing an object doesn't affect the number of elements in the hash table
|
|
|
|
* or bucket, so we don't need to worry about shrinking or expanding the
|
|
|
|
* table here.
|
|
|
|
*
|
|
|
|
* Returns zero on success, -ENOENT if the entry could not be found,
|
|
|
|
* -EINVAL if hash is not the same for the old and new objects.
|
|
|
|
*/
|
|
|
|
static inline int rhashtable_replace_fast(
|
|
|
|
struct rhashtable *ht, struct rhash_head *obj_old,
|
|
|
|
struct rhash_head *obj_new,
|
|
|
|
const struct rhashtable_params params)
|
|
|
|
{
|
|
|
|
struct bucket_table *tbl;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
tbl = rht_dereference_rcu(ht->tbl, ht);
|
|
|
|
|
|
|
|
/* Because we have already taken (and released) the bucket
|
|
|
|
* lock in old_tbl, if we find that future_tbl is not yet
|
|
|
|
* visible then that guarantees the entry to still be in
|
|
|
|
* the old tbl if it exists.
|
|
|
|
*/
|
|
|
|
while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
|
|
|
|
obj_new, params)) &&
|
|
|
|
(tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
|
|
|
|
;
|
|
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2016-09-19 19:00:09 +08:00
|
|
|
/**
|
|
|
|
* rhltable_walk_enter - Initialise an iterator
|
|
|
|
* @hlt: Table to walk over
|
|
|
|
* @iter: Hash table Iterator
|
|
|
|
*
|
|
|
|
* This function prepares a hash table walk.
|
|
|
|
*
|
|
|
|
* Note that if you restart a walk after rhashtable_walk_stop you
|
|
|
|
* may see the same object twice. Also, you may miss objects if
|
|
|
|
* there are removals in between rhashtable_walk_stop and the next
|
|
|
|
* call to rhashtable_walk_start.
|
|
|
|
*
|
|
|
|
* For a completely stable walk you should construct your own data
|
|
|
|
* structure outside the hash table.
|
|
|
|
*
|
2018-04-24 08:29:13 +10:00
|
|
|
* This function may be called from any process context, including
|
|
|
|
* non-preemptable context, but cannot be called from softirq or
|
|
|
|
* hardirq context.
|
2016-09-19 19:00:09 +08:00
|
|
|
*
|
|
|
|
* You must call rhashtable_walk_exit after this function returns.
|
|
|
|
*/
|
|
|
|
static inline void rhltable_walk_enter(struct rhltable *hlt,
|
|
|
|
struct rhashtable_iter *iter)
|
|
|
|
{
|
|
|
|
return rhashtable_walk_enter(&hlt->ht, iter);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* rhltable_free_and_destroy - free elements and destroy hash list table
|
|
|
|
* @hlt: the hash list table to destroy
|
|
|
|
* @free_fn: callback to release resources of element
|
|
|
|
* @arg: pointer passed to free_fn
|
|
|
|
*
|
|
|
|
* See documentation for rhashtable_free_and_destroy.
|
|
|
|
*/
|
|
|
|
static inline void rhltable_free_and_destroy(struct rhltable *hlt,
|
|
|
|
void (*free_fn)(void *ptr,
|
|
|
|
void *arg),
|
|
|
|
void *arg)
|
|
|
|
{
|
|
|
|
return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void rhltable_destroy(struct rhltable *hlt)
|
|
|
|
{
|
|
|
|
return rhltable_free_and_destroy(hlt, NULL, NULL);
|
|
|
|
}
|
|
|
|
|
2014-08-02 11:47:44 +02:00
|
|
|
#endif /* _LINUX_RHASHTABLE_H */
|