xarray: Change definition of sibling entries

Instead of storing a pointer to the slot containing the canonical entry,
store the offset of the slot.  Produces slightly more efficient code
(~300 bytes) and simplifies the implementation.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
This commit is contained in:
Matthew Wilcox 2017-11-03 23:09:45 -04:00
parent 3159f943aa
commit 02c02bf12c
6 changed files with 121 additions and 50 deletions

View File

@ -59,10 +59,7 @@ static inline bool radix_tree_is_internal_node(void *ptr)
#define RADIX_TREE_MAX_TAGS 3 #define RADIX_TREE_MAX_TAGS 3
#ifndef RADIX_TREE_MAP_SHIFT #define RADIX_TREE_MAP_SHIFT XA_CHUNK_SHIFT
#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
#endif
#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)

View File

@ -22,6 +22,12 @@
* x1: Value entry or tagged pointer * x1: Value entry or tagged pointer
* *
* Attempting to store internal entries in the XArray is a bug. * Attempting to store internal entries in the XArray is a bug.
*
* Most internal entries are pointers to the next node in the tree.
* The following internal entries have a special meaning:
*
* 0-62: Sibling entries
* 256: Retry entry
*/ */
#define BITS_PER_XA_VALUE (BITS_PER_LONG - 1) #define BITS_PER_XA_VALUE (BITS_PER_LONG - 1)
@ -111,6 +117,42 @@ static inline unsigned int xa_pointer_tag(void *entry)
return (unsigned long)entry & 3UL; return (unsigned long)entry & 3UL;
} }
/*
* xa_mk_internal() - Create an internal entry.
* @v: Value to turn into an internal entry.
*
* Context: Any context.
* Return: An XArray internal entry corresponding to this value.
*/
static inline void *xa_mk_internal(unsigned long v)
{
return (void *)((v << 2) | 2);
}
/*
* xa_to_internal() - Extract the value from an internal entry.
* @entry: XArray entry.
*
* Context: Any context.
* Return: The value which was stored in the internal entry.
*/
static inline unsigned long xa_to_internal(const void *entry)
{
return (unsigned long)entry >> 2;
}
/*
* xa_is_internal() - Is the entry an internal entry?
* @entry: XArray entry.
*
* Context: Any context.
* Return: %true if the entry is an internal entry.
*/
static inline bool xa_is_internal(const void *entry)
{
return ((unsigned long)entry & 3) == 2;
}
#define xa_trylock(xa) spin_trylock(&(xa)->xa_lock) #define xa_trylock(xa) spin_trylock(&(xa)->xa_lock)
#define xa_lock(xa) spin_lock(&(xa)->xa_lock) #define xa_lock(xa) spin_lock(&(xa)->xa_lock)
#define xa_unlock(xa) spin_unlock(&(xa)->xa_lock) #define xa_unlock(xa) spin_unlock(&(xa)->xa_lock)
@ -123,4 +165,54 @@ static inline unsigned int xa_pointer_tag(void *entry)
#define xa_unlock_irqrestore(xa, flags) \ #define xa_unlock_irqrestore(xa, flags) \
spin_unlock_irqrestore(&(xa)->xa_lock, flags) spin_unlock_irqrestore(&(xa)->xa_lock, flags)
/* Everything below here is the Advanced API. Proceed with caution. */
/*
* The xarray is constructed out of a set of 'chunks' of pointers. Choosing
* the best chunk size requires some tradeoffs. A power of two recommends
* itself so that we can walk the tree based purely on shifts and masks.
* Generally, the larger the better; as the number of slots per level of the
* tree increases, the less tall the tree needs to be. But that needs to be
* balanced against the memory consumption of each node. On a 64-bit system,
* xa_node is currently 576 bytes, and we get 7 of them per 4kB page. If we
* doubled the number of slots per node, we'd get only 3 nodes per 4kB page.
*/
#ifndef XA_CHUNK_SHIFT
#define XA_CHUNK_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
#endif
#define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT)
#define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1)
/* Private */
static inline bool xa_is_node(const void *entry)
{
return xa_is_internal(entry) && (unsigned long)entry > 4096;
}
/* Private */
static inline void *xa_mk_sibling(unsigned int offset)
{
return xa_mk_internal(offset);
}
/* Private */
static inline unsigned long xa_to_sibling(const void *entry)
{
return xa_to_internal(entry);
}
/**
* xa_is_sibling() - Is the entry a sibling entry?
* @entry: Entry retrieved from the XArray
*
* Return: %true if the entry is a sibling entry.
*/
static inline bool xa_is_sibling(const void *entry)
{
return IS_ENABLED(CONFIG_XARRAY_MULTI) && xa_is_internal(entry) &&
(entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
}
#define XA_RETRY_ENTRY xa_mk_internal(256)
#endif /* _LINUX_XARRAY_H */ #endif /* _LINUX_XARRAY_H */

View File

@ -399,8 +399,15 @@ config INTERVAL_TREE
for more information. for more information.
config XARRAY_MULTI
bool
help
Support entries which occupy multiple consecutive indices in the
XArray.
config RADIX_TREE_MULTIORDER config RADIX_TREE_MULTIORDER
bool bool
select XARRAY_MULTI
config ASSOCIATIVE_ARRAY config ASSOCIATIVE_ARRAY
bool bool

View File

@ -38,6 +38,7 @@
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/xarray.h>
/* Number of nodes in fully populated tree of given height */ /* Number of nodes in fully populated tree of given height */
@ -98,24 +99,7 @@ static inline void *node_to_entry(void *ptr)
return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
} }
#define RADIX_TREE_RETRY node_to_entry(NULL) #define RADIX_TREE_RETRY XA_RETRY_ENTRY
#ifdef CONFIG_RADIX_TREE_MULTIORDER
/* Sibling slots point directly to another slot in the same node */
static inline
bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
{
void __rcu **ptr = node;
return (parent->slots <= ptr) &&
(ptr < parent->slots + RADIX_TREE_MAP_SIZE);
}
#else
static inline
bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
{
return false;
}
#endif
static inline unsigned long static inline unsigned long
get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
@ -129,16 +113,10 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
#ifdef CONFIG_RADIX_TREE_MULTIORDER if (xa_is_sibling(entry)) {
if (radix_tree_is_internal_node(entry)) { offset = xa_to_sibling(entry);
if (is_sibling_entry(parent, entry)) { entry = rcu_dereference_raw(parent->slots[offset]);
void __rcu **sibentry;
sibentry = (void __rcu **) entry_to_node(entry);
offset = get_slot_offset(parent, sibentry);
entry = rcu_dereference_raw(*sibentry);
}
} }
#endif
*nodep = (void *)entry; *nodep = (void *)entry;
return offset; return offset;
@ -300,10 +278,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
} else if (!radix_tree_is_internal_node(entry)) { } else if (!radix_tree_is_internal_node(entry)) {
pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n", pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
entry, i, first, last, node); entry, i, first, last, node);
} else if (is_sibling_entry(node, entry)) { } else if (xa_is_sibling(entry)) {
pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n", pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
entry, i, first, last, node, entry, i, first, last, node,
*(void **)entry_to_node(entry)); node->slots[xa_to_sibling(entry)]);
} else { } else {
dump_node(entry_to_node(entry), first); dump_node(entry_to_node(entry), first);
} }
@ -881,8 +859,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
for (;;) { for (;;) {
void *entry = rcu_dereference_raw(child->slots[offset]); void *entry = rcu_dereference_raw(child->slots[offset]);
if (radix_tree_is_internal_node(entry) && child->shift && if (xa_is_node(entry) && child->shift) {
!is_sibling_entry(child, entry)) {
child = entry_to_node(entry); child = entry_to_node(entry);
offset = 0; offset = 0;
continue; continue;
@ -904,7 +881,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
static inline int insert_entries(struct radix_tree_node *node, static inline int insert_entries(struct radix_tree_node *node,
void __rcu **slot, void *item, unsigned order, bool replace) void __rcu **slot, void *item, unsigned order, bool replace)
{ {
struct radix_tree_node *child; void *sibling;
unsigned i, n, tag, offset, tags = 0; unsigned i, n, tag, offset, tags = 0;
if (node) { if (node) {
@ -922,7 +899,7 @@ static inline int insert_entries(struct radix_tree_node *node,
offset = offset & ~(n - 1); offset = offset & ~(n - 1);
slot = &node->slots[offset]; slot = &node->slots[offset];
} }
child = node_to_entry(slot); sibling = xa_mk_sibling(offset);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (slot[i]) { if (slot[i]) {
@ -939,7 +916,7 @@ static inline int insert_entries(struct radix_tree_node *node,
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
struct radix_tree_node *old = rcu_dereference_raw(slot[i]); struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
if (i) { if (i) {
rcu_assign_pointer(slot[i], child); rcu_assign_pointer(slot[i], sibling);
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
if (tags & (1 << tag)) if (tags & (1 << tag))
tag_clear(node, tag, offset + i); tag_clear(node, tag, offset + i);
@ -949,9 +926,7 @@ static inline int insert_entries(struct radix_tree_node *node,
if (tags & (1 << tag)) if (tags & (1 << tag))
tag_set(node, tag, offset); tag_set(node, tag, offset);
} }
if (radix_tree_is_internal_node(old) && if (xa_is_node(old))
!is_sibling_entry(node, old) &&
(old != RADIX_TREE_RETRY))
radix_tree_free_nodes(old); radix_tree_free_nodes(old);
if (xa_is_value(old)) if (xa_is_value(old))
node->exceptional--; node->exceptional--;
@ -1112,10 +1087,10 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
void __rcu **slot, int count, int exceptional) void __rcu **slot, int count, int exceptional)
{ {
#ifdef CONFIG_RADIX_TREE_MULTIORDER #ifdef CONFIG_RADIX_TREE_MULTIORDER
void *ptr = node_to_entry(slot); unsigned offset = get_slot_offset(node, slot);
unsigned offset = get_slot_offset(node, slot) + 1; void *ptr = xa_mk_sibling(offset);
while (offset < RADIX_TREE_MAP_SIZE) { while (++offset < RADIX_TREE_MAP_SIZE) {
if (rcu_dereference_raw(node->slots[offset]) != ptr) if (rcu_dereference_raw(node->slots[offset]) != ptr)
break; break;
if (count < 0) { if (count < 0) {
@ -1123,7 +1098,6 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
node->count--; node->count--;
} }
node->exceptional += exceptional; node->exceptional += exceptional;
offset++;
} }
#endif #endif
} }
@ -1319,8 +1293,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
tags |= 1 << tag; tags |= 1 << tag;
for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
if (!is_sibling_entry(parent, if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end])))
rcu_dereference_raw(parent->slots[end])))
break; break;
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
if (tags & (1 << tag)) if (tags & (1 << tag))
@ -1618,7 +1591,7 @@ static void __rcu **skip_siblings(struct radix_tree_node **nodep,
{ {
while (iter->index < iter->next_index) { while (iter->index < iter->next_index) {
*nodep = rcu_dereference_raw(*slot); *nodep = rcu_dereference_raw(*slot);
if (*nodep && !is_sibling_entry(iter->node, *nodep)) if (*nodep && !xa_is_sibling(*nodep))
return slot; return slot;
slot++; slot++;
iter->index = __radix_tree_iter_add(iter, 1); iter->index = __radix_tree_iter_add(iter, 1);
@ -1769,7 +1742,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
while (++offset < RADIX_TREE_MAP_SIZE) { while (++offset < RADIX_TREE_MAP_SIZE) {
void *slot = rcu_dereference_raw( void *slot = rcu_dereference_raw(
node->slots[offset]); node->slots[offset]);
if (is_sibling_entry(node, slot)) if (xa_is_sibling(slot))
continue; continue;
if (slot) if (slot)
break; break;
@ -2283,6 +2256,7 @@ void __init radix_tree_init(void)
BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
radix_tree_node_cachep = kmem_cache_create("radix_tree_node", radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
sizeof(struct radix_tree_node), 0, sizeof(struct radix_tree_node), 0,
SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,

View File

@ -46,6 +46,6 @@ idr.c: ../../../lib/idr.c
generated/map-shift.h: generated/map-shift.h:
@if ! grep -qws $(SHIFT) generated/map-shift.h; then \ @if ! grep -qws $(SHIFT) generated/map-shift.h; then \
echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \
generated/map-shift.h; \ generated/map-shift.h; \
fi fi

View File

@ -1 +1,2 @@
#define CONFIG_RADIX_TREE_MULTIORDER 1 #define CONFIG_RADIX_TREE_MULTIORDER 1
#define CONFIG_XARRAY_MULTI 1