ceph: introduce a new inode flag indicating if cached dentries are ordered

After creating/deleting/renaming file, offsets of sibling dentries may
change. So we can not use cached dentries to satisfy readdir. But we can
still use the cached dentries to conclude -ENOENT for lookup.

This patch introduces a new inode flag indicating if child dentries are
ordered. The flag is set at the same time marking a directory complete.
After creating/deleting/renaming file, we clear the flag on directory
inode. This prevents ceph_readdir() from using cached dentries to satisfy
readdir syscall.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
This commit is contained in:
Yan, Zheng 2014-10-21 18:09:56 -07:00 committed by Ilya Dryomov
parent 4965fc38c4
commit 70db4f3629
3 changed files with 55 additions and 19 deletions

View File

@ -183,7 +183,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
spin_unlock(&parent->d_lock);
/* make sure a dentry wasn't dropped while we didn't have parent lock */
if (!ceph_dir_is_complete(dir)) {
if (!ceph_dir_is_complete_ordered(dir)) {
dout(" lost dir complete on %p; falling back to mds\n", dir);
dput(dentry);
err = -EAGAIN;
@ -261,10 +261,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* always start with . and .. */
if (ctx->pos == 0) {
/* note dir version at start of readdir so we can tell
* if any dentries get dropped */
fi->dir_release_count = atomic_read(&ci->i_release_count);
dout("readdir off 0 -> '.'\n");
if (!dir_emit(ctx, ".", 1,
ceph_translate_ino(inode->i_sb, inode->i_ino),
@ -289,7 +285,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
if ((ctx->pos == 2 || fi->dentry) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
__ceph_dir_is_complete(ci) &&
__ceph_dir_is_complete_ordered(ci) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
u32 shared_gen = ci->i_shared_gen;
spin_unlock(&ci->i_ceph_lock);
@ -312,6 +308,13 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* proceed with a normal readdir */
if (ctx->pos == 2) {
/* note dir version at start of readdir so we can tell
* if any dentries get dropped */
fi->dir_release_count = atomic_read(&ci->i_release_count);
fi->dir_ordered_count = ci->i_ordered_count;
}
more:
/* do we have the correct frag content buffered? */
if (fi->frag != frag || fi->last_readdir == NULL) {
@ -446,8 +449,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
*/
spin_lock(&ci->i_ceph_lock);
if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
dout(" marking %p complete\n", inode);
__ceph_dir_set_complete(ci, fi->dir_release_count);
if (ci->i_ordered_count == fi->dir_ordered_count)
dout(" marking %p complete and ordered\n", inode);
else
dout(" marking %p complete\n", inode);
__ceph_dir_set_complete(ci, fi->dir_release_count,
fi->dir_ordered_count);
}
spin_unlock(&ci->i_ceph_lock);

View File

@ -389,6 +389,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_version = 0;
ci->i_time_warp_seq = 0;
ci->i_ceph_flags = 0;
ci->i_ordered_count = 0;
atomic_set(&ci->i_release_count, 1);
atomic_set(&ci->i_complete_count, 0);
ci->i_symlink = NULL;
@ -845,7 +846,8 @@ static int fill_inode(struct inode *inode,
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
!__ceph_dir_is_complete(ci)) {
dout(" marking %p complete (empty)\n", inode);
__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count),
ci->i_ordered_count);
}
/* were we issued a capability? */
@ -1206,8 +1208,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
ceph_invalidate_dentry_lease(dn);
/* d_move screws up sibling dentries' offsets */
ceph_dir_clear_complete(dir);
ceph_dir_clear_complete(olddir);
ceph_dir_clear_ordered(dir);
ceph_dir_clear_ordered(olddir);
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
ceph_dentry(req->r_old_dentry)->offset);
@ -1219,6 +1221,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
if (!rinfo->head->is_target) {
dout("fill_trace null dentry\n");
if (dn->d_inode) {
ceph_dir_clear_ordered(dir);
dout("d_delete %p\n", dn);
d_delete(dn);
} else {
@ -1235,7 +1238,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
/* attach proper inode */
if (!dn->d_inode) {
ceph_dir_clear_complete(dir);
ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in, &have_lease);
if (IS_ERR(dn)) {
@ -1265,7 +1268,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
BUG_ON(!dir);
BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
dout(" linking snapped dir %p to dn %p\n", in, dn);
ceph_dir_clear_complete(dir);
ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in, NULL);
if (IS_ERR(dn)) {

View File

@ -256,6 +256,7 @@ struct ceph_inode_info {
u32 i_time_warp_seq;
unsigned i_ceph_flags;
int i_ordered_count;
atomic_t i_release_count;
atomic_t i_complete_count;
@ -434,14 +435,19 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
/*
* Ceph inode.
*/
#define CEPH_I_NODELAY 4 /* do not delay cap release */
#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
#define CEPH_I_DIR_ORDERED 1 /* dentries in dir are ordered */
#define CEPH_I_NODELAY 4 /* do not delay cap release */
#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
int release_count)
int release_count, int ordered_count)
{
atomic_set(&ci->i_complete_count, release_count);
if (ci->i_ordered_count == ordered_count)
ci->i_ceph_flags |= CEPH_I_DIR_ORDERED;
else
ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
}
static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
@ -455,16 +461,35 @@ static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
atomic_read(&ci->i_release_count);
}
static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci)
{
return __ceph_dir_is_complete(ci) &&
(ci->i_ceph_flags & CEPH_I_DIR_ORDERED);
}
static inline void ceph_dir_clear_complete(struct inode *inode)
{
__ceph_dir_clear_complete(ceph_inode(inode));
}
static inline bool ceph_dir_is_complete(struct inode *inode)
static inline void ceph_dir_clear_ordered(struct inode *inode)
{
return __ceph_dir_is_complete(ceph_inode(inode));
struct ceph_inode_info *ci = ceph_inode(inode);
spin_lock(&ci->i_ceph_lock);
ci->i_ordered_count++;
ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
spin_unlock(&ci->i_ceph_lock);
}
static inline bool ceph_dir_is_complete_ordered(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
bool ret;
spin_lock(&ci->i_ceph_lock);
ret = __ceph_dir_is_complete_ordered(ci);
spin_unlock(&ci->i_ceph_lock);
return ret;
}
/* find a specific frag @f */
extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
@ -580,6 +605,7 @@ struct ceph_file_info {
char *last_name; /* last entry in previous chunk */
struct dentry *dentry; /* next dentry (for dcache readdir) */
int dir_release_count;
int dir_ordered_count;
/* used for -o dirstat read() on directory thing */
char *dir_info;