xfs: use per-filesystem radix trees for dquot lookup

Replace the global hash tables for looking up in-memory dquot structures
with per-filesystem radix trees to allow scaling to a large number of
in-memory dquot structures.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>
This commit is contained in:
Christoph Hellwig 2012-03-13 08:52:35 +00:00 committed by Ben Myers
parent f8739c3ce2
commit 9f920f1164
6 changed files with 68 additions and 265 deletions

View File

@ -43,7 +43,7 @@
* Lock order:
*
* ip->i_lock
* qh->qh_lock
* qi->qi_tree_lock
* qi->qi_dqlist_lock
* dquot->q_qlock (xfs_dqlock() and friends)
* dquot->q_flush (xfs_dqflock() and friends)
@ -601,60 +601,6 @@ xfs_qm_dqread(
return error;
}
/*
* Lookup a dquot in the incore dquot hashtable. We keep two separate
* hashtables for user and group dquots; and, these are global tables
* inside the XQM, not per-filesystem tables.
* The hash chain must be locked by caller, and it is left locked
* on return. Returning dquot is locked.
*/
STATIC int
xfs_qm_dqlookup(
xfs_mount_t *mp,
xfs_dqid_t id,
xfs_dqhash_t *qh,
xfs_dquot_t **O_dqpp)
{
xfs_dquot_t *dqp;
ASSERT(mutex_is_locked(&qh->qh_lock));
/*
* Traverse the hashchain looking for a match
*/
list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
/*
* We already have the hashlock. We don't need the
* dqlock to look at the id field of the dquot, since the
* id can't be modified without the hashlock anyway.
*/
if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
continue;
trace_xfs_dqlookup_found(dqp);
xfs_dqlock(dqp);
if (dqp->dq_flags & XFS_DQ_FREEING) {
*O_dqpp = NULL;
xfs_dqunlock(dqp);
return -1;
}
dqp->q_nrefs++;
/*
* move the dquot to the front of the hashchain
*/
list_move(&dqp->q_hashlist, &qh->qh_list);
trace_xfs_dqlookup_done(dqp);
*O_dqpp = dqp;
return 0;
}
*O_dqpp = NULL;
return 1;
}
/*
* Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
* a locked dquot, doing an allocation (if requested) as needed.
@ -672,10 +618,10 @@ xfs_qm_dqget(
uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
{
xfs_dquot_t *dqp, *dqp1;
xfs_dqhash_t *h;
uint version;
int error;
struct xfs_quotainfo *qi = mp->m_quotainfo;
struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
struct xfs_dquot *dqp;
int error;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
@ -683,7 +629,6 @@ xfs_qm_dqget(
(! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
return (ESRCH);
}
h = XFS_DQ_HASH(mp, id, type);
#ifdef DEBUG
if (xfs_do_dqerror) {
@ -704,34 +649,28 @@ xfs_qm_dqget(
#endif
restart:
mutex_lock(&h->qh_lock);
mutex_lock(&qi->qi_tree_lock);
dqp = radix_tree_lookup(tree, id);
if (dqp) {
xfs_dqlock(dqp);
if (dqp->dq_flags & XFS_DQ_FREEING) {
xfs_dqunlock(dqp);
mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_freeing(dqp);
delay(1);
goto restart;
}
/*
* Look in the cache (hashtable).
* The chain is kept locked during lookup.
*/
switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
case -1:
XFS_STATS_INC(xs_qm_dquot_dups);
mutex_unlock(&h->qh_lock);
delay(1);
goto restart;
case 0:
dqp->q_nrefs++;
mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_hit(dqp);
XFS_STATS_INC(xs_qm_dqcachehits);
/*
* The dquot was found, moved to the front of the chain,
* taken off the freelist if it was on it, and locked
* at this point. Just unlock the hashchain and return.
*/
ASSERT(*O_dqpp);
ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
mutex_unlock(&h->qh_lock);
trace_xfs_dqget_hit(*O_dqpp);
return 0; /* success */
default:
XFS_STATS_INC(xs_qm_dqcachemisses);
break;
*O_dqpp = dqp;
return 0;
}
mutex_unlock(&qi->qi_tree_lock);
XFS_STATS_INC(xs_qm_dqcachemisses);
/*
* Dquot cache miss. We don't want to keep the inode lock across
@ -742,12 +681,6 @@ xfs_qm_dqget(
*/
if (ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/*
* Save the hashchain version stamp, and unlock the chain, so that
* we don't keep the lock across a disk read
*/
version = h->qh_version;
mutex_unlock(&h->qh_lock);
error = xfs_qm_dqread(mp, id, type, flags, &dqp);
@ -757,15 +690,14 @@ xfs_qm_dqget(
if (error)
return error;
/*
* Dquot lock comes after hashlock in the lock ordering
*/
if (ip) {
/*
* A dquot could be attached to this inode by now, since
* we had dropped the ilock.
*/
if (xfs_this_quota_on(mp, type)) {
struct xfs_dquot *dqp1;
dqp1 = xfs_inode_dquot(ip, type);
if (dqp1) {
xfs_qm_dqdestroy(dqp);
@ -780,51 +712,27 @@ xfs_qm_dqget(
}
}
/*
* Hashlock comes after ilock in lock order
*/
mutex_lock(&h->qh_lock);
if (version != h->qh_version) {
xfs_dquot_t *tmpdqp;
/*
* Now, see if somebody else put the dquot in the
* hashtable before us. This can happen because we didn't
* keep the hashchain lock. We don't have to worry about
* lock order between the two dquots here since dqp isn't
* on any findable lists yet.
*/
switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
case 0:
case -1:
/*
* Duplicate found, either in cache or on its way out.
* Just throw away the new dquot and start over.
*/
if (tmpdqp)
xfs_qm_dqput(tmpdqp);
mutex_unlock(&h->qh_lock);
xfs_qm_dqdestroy(dqp);
XFS_STATS_INC(xs_qm_dquot_dups);
goto restart;
default:
break;
}
}
mutex_lock(&qi->qi_tree_lock);
error = -radix_tree_insert(tree, id, dqp);
if (unlikely(error)) {
WARN_ON(error != EEXIST);
/*
* Put the dquot at the beginning of the hash-chain and mp's list
* LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
*/
ASSERT(mutex_is_locked(&h->qh_lock));
dqp->q_hash = h;
list_add(&dqp->q_hashlist, &h->qh_list);
h->qh_version++;
/*
* Duplicate found. Just throw away the new dquot and start
* over.
*/
mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_dup(dqp);
xfs_qm_dqdestroy(dqp);
XFS_STATS_INC(xs_qm_dquot_dups);
goto restart;
}
/*
* Attach this dquot to this filesystem's list of all dquots,
* kept inside the mount structure in m_quotainfo field
*/
mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
mutex_lock(&qi->qi_dqlist_lock);
/*
* We return a locked dquot to the caller, with a reference taken
@ -832,10 +740,11 @@ xfs_qm_dqget(
xfs_dqlock(dqp);
dqp->q_nrefs = 1;
list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
mp->m_quotainfo->qi_dquots++;
mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
mutex_unlock(&h->qh_lock);
list_add(&dqp->q_mplist, &qi->qi_dqlist);
qi->qi_dquots++;
mutex_unlock(&qi->qi_dqlist_lock);
mutex_unlock(&qi->qi_tree_lock);
dqret:
ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
trace_xfs_dqget_miss(dqp);
@ -1117,7 +1026,6 @@ xfs_qm_dqpurge(
struct xfs_dquot *dqp)
{
struct xfs_mount *mp = dqp->q_mount;
struct xfs_dqhash *qh = dqp->q_hash;
struct xfs_quotainfo *qi = mp->m_quotainfo;
xfs_dqlock(dqp);
@ -1164,10 +1072,10 @@ xfs_qm_dqpurge(
xfs_dqfunlock(dqp);
xfs_dqunlock(dqp);
mutex_lock(&qh->qh_lock);
list_del_init(&dqp->q_hashlist);
qh->qh_version++;
mutex_unlock(&qh->qh_lock);
mutex_lock(&qi->qi_tree_lock);
radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
be32_to_cpu(dqp->q_core.d_id));
mutex_unlock(&qi->qi_tree_lock);
mutex_lock(&qi->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);

View File

@ -29,16 +29,6 @@
* when quotas are off.
*/
/*
* The hash chain headers (hash buckets)
*/
typedef struct xfs_dqhash {
struct list_head qh_list;
struct mutex qh_lock;
uint qh_version; /* ever increasing version */
uint qh_nelems; /* number of dquots on the list */
} xfs_dqhash_t;
struct xfs_mount;
struct xfs_trans;
@ -49,8 +39,6 @@ typedef struct xfs_dquot {
uint dq_flags; /* various flags (XFS_DQ_*) */
struct list_head q_lru; /* global free list of dquots */
struct list_head q_mplist; /* mount's list of dquots */
struct list_head q_hashlist; /* gloabl hash list of dquots */
xfs_dqhash_t *q_hash; /* the hashchain header */
struct xfs_mount*q_mount; /* filesystem this relates to */
struct xfs_trans*q_transp; /* trans this belongs to currently */
uint q_nrefs; /* # active refs from inodes */

View File

@ -54,9 +54,6 @@ struct xfs_qm *xfs_Gqm;
kmem_zone_t *qm_dqzone;
kmem_zone_t *qm_dqtrxzone;
STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
@ -68,37 +65,9 @@ STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
STATIC struct xfs_qm *
xfs_Gqm_init(void)
{
xfs_dqhash_t *udqhash, *gdqhash;
xfs_qm_t *xqm;
size_t hsize;
uint i;
/*
* Initialize the dquot hash tables.
*/
udqhash = kmem_zalloc_greedy(&hsize,
XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
if (!udqhash)
goto out;
gdqhash = kmem_zalloc_large(hsize);
if (!gdqhash)
goto out_free_udqhash;
hsize /= sizeof(xfs_dqhash_t);
xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
xqm->qm_dqhashmask = hsize - 1;
xqm->qm_usr_dqhtable = udqhash;
xqm->qm_grp_dqhtable = gdqhash;
ASSERT(xqm->qm_usr_dqhtable != NULL);
ASSERT(xqm->qm_grp_dqhtable != NULL);
for (i = 0; i < hsize; i++) {
xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
}
/*
* dquot zone. we register our own low-memory callback.
@ -122,11 +91,6 @@ xfs_Gqm_init(void)
xqm->qm_nrefs = 0;
return xqm;
out_free_udqhash:
kmem_free_large(udqhash);
out:
return NULL;
}
/*
@ -136,22 +100,9 @@ STATIC void
xfs_qm_destroy(
struct xfs_qm *xqm)
{
int hsize, i;
ASSERT(xqm != NULL);
ASSERT(xqm->qm_nrefs == 0);
hsize = xqm->qm_dqhashmask + 1;
for (i = 0; i < hsize; i++) {
xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
}
kmem_free_large(xqm->qm_usr_dqhtable);
kmem_free_large(xqm->qm_grp_dqhtable);
xqm->qm_usr_dqhtable = NULL;
xqm->qm_grp_dqhtable = NULL;
xqm->qm_dqhashmask = 0;
kmem_free(xqm);
}
@ -761,14 +712,6 @@ xfs_qm_dqdetach(
}
}
/*
* The hash chains and the mplist use the same xfs_dqhash structure as
* their list head, but we can take the mplist qh_lock and one of the
* hash qh_locks at the same time without any problem as they aren't
* related.
*/
static struct lock_class_key xfs_quota_mplist_class;
/*
* This initializes all the quota information that's kept in the
* mount structure
@ -802,9 +745,12 @@ xfs_qm_init_quotainfo(
return error;
}
INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
mutex_init(&qinf->qi_tree_lock);
INIT_LIST_HEAD(&qinf->qi_dqlist);
mutex_init(&qinf->qi_dqlist_lock);
lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
INIT_LIST_HEAD(&qinf->qi_lru_list);
qinf->qi_lru_count = 0;
@ -924,30 +870,6 @@ xfs_qm_destroy_quotainfo(
mp->m_quotainfo = NULL;
}
/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
/* ARGSUSED */
STATIC void
xfs_qm_list_init(
xfs_dqlist_t *list,
char *str,
int n)
{
mutex_init(&list->qh_lock);
INIT_LIST_HEAD(&list->qh_list);
list->qh_version = 0;
list->qh_nelems = 0;
}
STATIC void
xfs_qm_list_destroy(
xfs_dqlist_t *list)
{
mutex_destroy(&(list->qh_lock));
}
/*
* Create an inode and return with a reference already taken, but unlocked
* This is how we create quota inodes
@ -1592,10 +1514,10 @@ xfs_qm_dqfree_one(
struct xfs_mount *mp = dqp->q_mount;
struct xfs_quotainfo *qi = mp->m_quotainfo;
mutex_lock(&dqp->q_hash->qh_lock);
list_del_init(&dqp->q_hashlist);
dqp->q_hash->qh_version++;
mutex_unlock(&dqp->q_hash->qh_lock);
mutex_lock(&qi->qi_tree_lock);
radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
be32_to_cpu(dqp->q_core.d_id));
mutex_unlock(&qi->qi_tree_lock);
mutex_lock(&qi->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);
@ -1634,7 +1556,6 @@ xfs_qm_dqreclaim_one(
return;
}
ASSERT(dqp->q_hash);
ASSERT(!list_empty(&dqp->q_mplist));
/*

View File

@ -30,12 +30,6 @@ extern struct xfs_qm *xfs_Gqm;
extern kmem_zone_t *qm_dqzone;
extern kmem_zone_t *qm_dqtrxzone;
/*
* Dquot hashtable constants/threshold values.
*/
#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t))
#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
/*
* This defines the unit of allocation of dquots.
* Currently, it is just one file system block, and a 4K blk contains 30
@ -47,15 +41,10 @@ extern kmem_zone_t *qm_dqtrxzone;
*/
#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
typedef xfs_dqhash_t xfs_dqlist_t;
/*
* Quota Manager (global) structure. Lives only in core.
*/
typedef struct xfs_qm {
xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */
xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */
uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */
uint qm_nrefs; /* file systems with quota on */
kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
@ -66,6 +55,9 @@ typedef struct xfs_qm {
* The mount structure keeps a pointer to this.
*/
typedef struct xfs_quotainfo {
struct radix_tree_root qi_uquota_tree;
struct radix_tree_root qi_gquota_tree;
struct mutex qi_tree_lock;
xfs_inode_t *qi_uquotaip; /* user quota inode */
xfs_inode_t *qi_gquotaip; /* group quota inode */
struct list_head qi_lru_list;
@ -94,6 +86,11 @@ typedef struct xfs_quotainfo {
struct shrinker qi_shrinker;
} xfs_quotainfo_t;
#define XFS_DQUOT_TREE(qi, type) \
((type & XFS_DQ_USER) ? \
&((qi)->qi_uquota_tree) : \
&((qi)->qi_gquota_tree))
extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,

View File

@ -24,17 +24,6 @@
*/
#define XFS_DQITER_MAP_SIZE 10
/*
* Hash into a bucket in the dquot hash table, based on <mp, id>.
*/
#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
(__psunsigned_t)(id)) & \
(xfs_Gqm->qm_dqhashmask - 1))
#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \
(xfs_Gqm->qm_usr_dqhtable + \
XFS_DQ_HASHVAL(mp, id)) : \
(xfs_Gqm->qm_grp_dqhtable + \
XFS_DQ_HASHVAL(mp, id)))
#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
!dqp->q_core.d_blk_hardlimit && \
!dqp->q_core.d_blk_softlimit && \

View File

@ -741,10 +741,10 @@ DEFINE_DQUOT_EVENT(xfs_dqalloc);
DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
DEFINE_DQUOT_EVENT(xfs_dqread);
DEFINE_DQUOT_EVENT(xfs_dqread_fail);
DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
DEFINE_DQUOT_EVENT(xfs_dqget_hit);
DEFINE_DQUOT_EVENT(xfs_dqget_miss);
DEFINE_DQUOT_EVENT(xfs_dqget_freeing);
DEFINE_DQUOT_EVENT(xfs_dqget_dup);
DEFINE_DQUOT_EVENT(xfs_dqput);
DEFINE_DQUOT_EVENT(xfs_dqput_wait);
DEFINE_DQUOT_EVENT(xfs_dqput_free);