mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-01 10:45:49 +00:00
blkcg: use radix tree to index blkgs from blkcg
blkg lookup is currently performed by traversing linked list anchored at blkcg->blkg_list. This is very unscalable and with blk-throttle enabled and enough request queues on the system, this can get very ugly quickly (blk-throttle performs look up on every bio submission). This patch makes blkcg use radix tree to index blkgs combined with simple last-looked-up hint. This is mostly identical to how icqs are indexed from ioc. Note that because __blkg_lookup() may be invoked without holding queue lock, hint is only updated from __blkg_lookup_create(). Due to cfq's cfqq caching, this makes hint updates overly lazy. This will be improved with scheduled blkcg aware request allocation. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
496fb7806d
commit
a637120e49
@ -142,11 +142,21 @@ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
|
|||||||
struct request_queue *q)
|
struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct blkcg_gq *blkg;
|
struct blkcg_gq *blkg;
|
||||||
struct hlist_node *n;
|
|
||||||
|
|
||||||
hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
|
blkg = rcu_dereference(blkcg->blkg_hint);
|
||||||
if (blkg->q == q)
|
if (blkg && blkg->q == q)
|
||||||
return blkg;
|
return blkg;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hint didn't match. Look up from the radix tree. Note that we
|
||||||
|
* may not be holding queue_lock and thus are not sure whether
|
||||||
|
* @blkg from blkg_tree has already been removed or not, so we
|
||||||
|
* can't update hint to the lookup result. Leave it to the caller.
|
||||||
|
*/
|
||||||
|
blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
|
||||||
|
if (blkg && blkg->q == q)
|
||||||
|
return blkg;
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,9 +189,12 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
|||||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||||
lockdep_assert_held(q->queue_lock);
|
lockdep_assert_held(q->queue_lock);
|
||||||
|
|
||||||
|
/* lookup and update hint on success, see __blkg_lookup() for details */
|
||||||
blkg = __blkg_lookup(blkcg, q);
|
blkg = __blkg_lookup(blkcg, q);
|
||||||
if (blkg)
|
if (blkg) {
|
||||||
|
rcu_assign_pointer(blkcg->blkg_hint, blkg);
|
||||||
return blkg;
|
return blkg;
|
||||||
|
}
|
||||||
|
|
||||||
/* blkg holds a reference to blkcg */
|
/* blkg holds a reference to blkcg */
|
||||||
if (!css_tryget(&blkcg->css))
|
if (!css_tryget(&blkcg->css))
|
||||||
@ -194,12 +207,24 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
|||||||
goto err_put;
|
goto err_put;
|
||||||
|
|
||||||
/* insert */
|
/* insert */
|
||||||
spin_lock(&blkcg->lock);
|
ret = radix_tree_preload(GFP_ATOMIC);
|
||||||
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
|
if (ret)
|
||||||
list_add(&blkg->q_node, &q->blkg_list);
|
goto err_free;
|
||||||
spin_unlock(&blkcg->lock);
|
|
||||||
return blkg;
|
|
||||||
|
|
||||||
|
spin_lock(&blkcg->lock);
|
||||||
|
ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
|
||||||
|
if (likely(!ret)) {
|
||||||
|
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
|
||||||
|
list_add(&blkg->q_node, &q->blkg_list);
|
||||||
|
}
|
||||||
|
spin_unlock(&blkcg->lock);
|
||||||
|
|
||||||
|
radix_tree_preload_end();
|
||||||
|
|
||||||
|
if (!ret)
|
||||||
|
return blkg;
|
||||||
|
err_free:
|
||||||
|
blkg_free(blkg);
|
||||||
err_put:
|
err_put:
|
||||||
css_put(&blkcg->css);
|
css_put(&blkcg->css);
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
@ -229,9 +254,19 @@ static void blkg_destroy(struct blkcg_gq *blkg)
|
|||||||
/* Something wrong if we are trying to remove same group twice */
|
/* Something wrong if we are trying to remove same group twice */
|
||||||
WARN_ON_ONCE(list_empty(&blkg->q_node));
|
WARN_ON_ONCE(list_empty(&blkg->q_node));
|
||||||
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
|
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
|
||||||
|
|
||||||
|
radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
|
||||||
list_del_init(&blkg->q_node);
|
list_del_init(&blkg->q_node);
|
||||||
hlist_del_init_rcu(&blkg->blkcg_node);
|
hlist_del_init_rcu(&blkg->blkcg_node);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Both setting lookup hint to and clearing it from @blkg are done
|
||||||
|
* under queue_lock. If it's not pointing to @blkg now, it never
|
||||||
|
* will. Hint assignment itself can race safely.
|
||||||
|
*/
|
||||||
|
if (rcu_dereference_raw(blkcg->blkg_hint) == blkg)
|
||||||
|
rcu_assign_pointer(blkcg->blkg_hint, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Put the reference taken at the time of creation so that when all
|
* Put the reference taken at the time of creation so that when all
|
||||||
* queues are gone, group can be destroyed.
|
* queues are gone, group can be destroyed.
|
||||||
@ -593,6 +628,7 @@ static struct cgroup_subsys_state *blkcg_create(struct cgroup *cgroup)
|
|||||||
blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
|
blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
|
||||||
done:
|
done:
|
||||||
spin_lock_init(&blkcg->lock);
|
spin_lock_init(&blkcg->lock);
|
||||||
|
INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
|
||||||
INIT_HLIST_HEAD(&blkcg->blkg_list);
|
INIT_HLIST_HEAD(&blkcg->blkg_list);
|
||||||
|
|
||||||
return &blkcg->css;
|
return &blkcg->css;
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include <linux/cgroup.h>
|
#include <linux/cgroup.h>
|
||||||
#include <linux/u64_stats_sync.h>
|
#include <linux/u64_stats_sync.h>
|
||||||
#include <linux/seq_file.h>
|
#include <linux/seq_file.h>
|
||||||
|
#include <linux/radix-tree.h>
|
||||||
|
|
||||||
/* Max limits for throttle policy */
|
/* Max limits for throttle policy */
|
||||||
#define THROTL_IOPS_MAX UINT_MAX
|
#define THROTL_IOPS_MAX UINT_MAX
|
||||||
@ -37,9 +38,14 @@ enum blkg_rwstat_type {
|
|||||||
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
|
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct blkcg_gq;
|
||||||
|
|
||||||
struct blkcg {
|
struct blkcg {
|
||||||
struct cgroup_subsys_state css;
|
struct cgroup_subsys_state css;
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
|
|
||||||
|
struct radix_tree_root blkg_tree;
|
||||||
|
struct blkcg_gq *blkg_hint;
|
||||||
struct hlist_head blkg_list;
|
struct hlist_head blkg_list;
|
||||||
|
|
||||||
/* for policies to test whether associated blkcg has changed */
|
/* for policies to test whether associated blkcg has changed */
|
||||||
|
Loading…
Reference in New Issue
Block a user