lib: radix-tree: add entry deletion support to __radix_tree_replace()

Page cache shadow entry handling will be a lot simpler when it can use a
single generic replacement function for pages, shadow entries, and
emptying slots.

Make __radix_tree_replace() properly account insertions and deletions in
node->count and garbage collect nodes as they become empty.  Then
re-implement radix_tree_delete() on top of it.

Link: http://lkml.kernel.org/r/20161117193058.GC23430@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Johannes Weiner 2016-12-12 16:43:46 -08:00 committed by Linus Torvalds
parent 6d75f366b9
commit f4b109c6da

View File

@ -538,6 +538,107 @@ static int radix_tree_extend(struct radix_tree_root *root,
return maxshift + RADIX_TREE_MAP_SHIFT;
}
/**
* radix_tree_shrink - shrink radix tree to minimum height
* @root radix tree root
*/
static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
bool shrunk = false;
for (;;) {
struct radix_tree_node *node = root->rnode;
struct radix_tree_node *child;
if (!radix_tree_is_internal_node(node))
break;
node = entry_to_node(node);
/*
* The candidate node has more than one child, or its child
* is not at the leftmost slot, or the child is a multiorder
* entry, we cannot shrink.
*/
if (node->count != 1)
break;
child = node->slots[0];
if (!child)
break;
if (!radix_tree_is_internal_node(child) && node->shift)
break;
if (radix_tree_is_internal_node(child))
entry_to_node(child)->parent = NULL;
/*
* We don't need rcu_assign_pointer(), since we are simply
* moving the node from one part of the tree to another: if it
* was safe to dereference the old pointer to it
* (node->slots[0]), it will be safe to dereference the new
* one (root->rnode) as far as dependent read barriers go.
*/
root->rnode = child;
/*
* We have a dilemma here. The node's slot[0] must not be
* NULLed in case there are concurrent lookups expecting to
* find the item. However if this was a bottom-level node,
* then it may be subject to the slot pointer being visible
* to callers dereferencing it. If item corresponding to
* slot[0] is subsequently deleted, these callers would expect
* their slot to become empty sooner or later.
*
* For example, lockless pagecache will look up a slot, deref
* the page pointer, and if the page has 0 refcount it means it
* was concurrently deleted from pagecache so try the deref
* again. Fortunately there is already a requirement for logic
* to retry the entire slot lookup -- the indirect pointer
* problem (replacing direct root node with an indirect pointer
* also results in a stale slot). So tag the slot as indirect
* to force callers to retry.
*/
if (!radix_tree_is_internal_node(child))
node->slots[0] = RADIX_TREE_RETRY;
radix_tree_node_free(node);
shrunk = true;
}
return shrunk;
}
static bool delete_node(struct radix_tree_root *root,
struct radix_tree_node *node)
{
bool deleted = false;
do {
struct radix_tree_node *parent;
if (node->count) {
if (node == entry_to_node(root->rnode))
deleted |= radix_tree_shrink(root);
return deleted;
}
parent = node->parent;
if (parent) {
parent->slots[node->offset] = NULL;
parent->count--;
} else {
root_tag_clear_all(root);
root->rnode = NULL;
}
radix_tree_node_free(node);
deleted = true;
node = parent;
} while (node);
return deleted;
}
/**
* __radix_tree_create - create a slot in a radix tree
* @root: radix tree root
@ -759,18 +860,20 @@ static void replace_slot(struct radix_tree_root *root,
bool warn_typeswitch)
{
void *old = rcu_dereference_raw(*slot);
int exceptional;
int count, exceptional;
WARN_ON_ONCE(radix_tree_is_internal_node(item));
WARN_ON_ONCE(!!item - !!old);
count = !!item - !!old;
exceptional = !!radix_tree_exceptional_entry(item) -
!!radix_tree_exceptional_entry(old);
WARN_ON_ONCE(warn_typeswitch && exceptional);
WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
if (node)
if (node) {
node->count += count;
node->exceptional += exceptional;
}
rcu_assign_pointer(*slot, item);
}
@ -790,12 +893,14 @@ void __radix_tree_replace(struct radix_tree_root *root,
void **slot, void *item)
{
/*
* This function supports replacing exceptional entries, but
* that needs accounting against the node unless the slot is
* root->rnode.
* This function supports replacing exceptional entries and
* deleting entries, but that needs accounting against the
* node unless the slot is root->rnode.
*/
replace_slot(root, node, slot, item,
!node && slot != (void **)&root->rnode);
delete_node(root, node);
}
/**
@ -810,8 +915,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
*
* NOTE: This cannot be used to switch between non-entries (empty slots),
* regular entries, and exceptional entries, as that requires accounting
* inside the radix tree node. When switching from one type of entry to
* another, use __radix_tree_lookup() and __radix_tree_replace().
* inside the radix tree node. When switching from one type of entry or
* deleting, use __radix_tree_lookup() and __radix_tree_replace().
*/
void radix_tree_replace_slot(struct radix_tree_root *root,
void **slot, void *item)
@ -1466,75 +1571,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
}
#endif /* CONFIG_SHMEM && CONFIG_SWAP */
/**
* radix_tree_shrink - shrink radix tree to minimum height
* @root radix tree root
*/
static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
bool shrunk = false;
for (;;) {
struct radix_tree_node *node = root->rnode;
struct radix_tree_node *child;
if (!radix_tree_is_internal_node(node))
break;
node = entry_to_node(node);
/*
* The candidate node has more than one child, or its child
* is not at the leftmost slot, or the child is a multiorder
* entry, we cannot shrink.
*/
if (node->count != 1)
break;
child = node->slots[0];
if (!child)
break;
if (!radix_tree_is_internal_node(child) && node->shift)
break;
if (radix_tree_is_internal_node(child))
entry_to_node(child)->parent = NULL;
/*
* We don't need rcu_assign_pointer(), since we are simply
* moving the node from one part of the tree to another: if it
* was safe to dereference the old pointer to it
* (node->slots[0]), it will be safe to dereference the new
* one (root->rnode) as far as dependent read barriers go.
*/
root->rnode = child;
/*
* We have a dilemma here. The node's slot[0] must not be
* NULLed in case there are concurrent lookups expecting to
* find the item. However if this was a bottom-level node,
* then it may be subject to the slot pointer being visible
* to callers dereferencing it. If item corresponding to
* slot[0] is subsequently deleted, these callers would expect
* their slot to become empty sooner or later.
*
* For example, lockless pagecache will look up a slot, deref
* the page pointer, and if the page has 0 refcount it means it
* was concurrently deleted from pagecache so try the deref
* again. Fortunately there is already a requirement for logic
* to retry the entire slot lookup -- the indirect pointer
* problem (replacing direct root node with an indirect pointer
* also results in a stale slot). So tag the slot as indirect
* to force callers to retry.
*/
if (!radix_tree_is_internal_node(child))
node->slots[0] = RADIX_TREE_RETRY;
radix_tree_node_free(node);
shrunk = true;
}
return shrunk;
}
/**
* __radix_tree_delete_node - try to free node after clearing a slot
* @root: radix tree root
@ -1549,33 +1585,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
bool __radix_tree_delete_node(struct radix_tree_root *root,
struct radix_tree_node *node)
{
bool deleted = false;
do {
struct radix_tree_node *parent;
if (node->count) {
if (node == entry_to_node(root->rnode))
deleted |= radix_tree_shrink(root);
return deleted;
}
parent = node->parent;
if (parent) {
parent->slots[node->offset] = NULL;
parent->count--;
} else {
root_tag_clear_all(root);
root->rnode = NULL;
}
radix_tree_node_free(node);
deleted = true;
node = parent;
} while (node);
return deleted;
return delete_node(root, node);
}
static inline void delete_sibling_entries(struct radix_tree_node *node,
@ -1632,12 +1642,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
node_tag_clear(root, node, tag, offset);
delete_sibling_entries(node, node_to_entry(slot), offset);
node->slots[offset] = NULL;
node->count--;
if (radix_tree_exceptional_entry(entry))
node->exceptional--;
__radix_tree_delete_node(root, node);
__radix_tree_replace(root, node, slot, NULL);
return entry;
}