mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-07 22:03:14 +00:00
We have:
- a set of patches to address fsync stalls caused by depending on periodic rather than triggered MDS journal flushes in some cases (Xiubo Li) - a fix for mtime effectively not getting updated in case of competing writers (Jeff Layton) - a couple of fixes for inode reference leaks and various WARNs after "umount -f" (Xiubo Li) - a new ceph.auth_mds extended attribute (Jeff Layton) - a smattering of fixups and cleanups from Jeff, Xiubo and Colin. -----BEGIN PGP SIGNATURE----- iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmE46mYTHGlkcnlvbW92 QGdtYWlsLmNvbQAKCRBKf944AhHzi9UEB/sGT4eqMzkQLzJ2XjpKUvxXaJNVdPvS Jmg26KV5wc9Y9v6L7ww/eQjxbTOnda3G2/XG0xiE8dC1vq54Vux/FKiAT+H2/z/9 onShFK+SARoF4DilKnY0JNCwcGxQ3FjWAgPqPKqAyTAX2wjVxDKFHB0C+7yhhJay wyDrRaaHyFc4TwHeiEi8xU7dB55XsvxWGUgnHbcOLyUbbBKddt98FadNZ2t9b76y EVwAxgY0RbUUFxOJ9VVjiaNLUP4532iXUn+fehMjRGmDCmjaLNxCrsq6d0p//LJV nhVRG+Mv8IfTjqZwFbnWV8xbGwX0lY+g+hn0cdi7urUH3GDa97vmJF3u =z6dR -----END PGP SIGNATURE----- Merge tag 'ceph-for-5.15-rc1' of git://github.com/ceph/ceph-client Pull ceph updates from Ilya Dryomov: - a set of patches to address fsync stalls caused by depending on periodic rather than triggered MDS journal flushes in some cases (Xiubo Li) - a fix for mtime effectively not getting updated in case of competing writers (Jeff Layton) - a couple of fixes for inode reference leaks and various WARNs after "umount -f" (Xiubo Li) - a new ceph.auth_mds extended attribute (Jeff Layton) - a smattering of fixups and cleanups from Jeff, Xiubo and Colin. * tag 'ceph-for-5.15-rc1' of git://github.com/ceph/ceph-client: ceph: fix dereference of null pointer cf ceph: drop the mdsc_get_session/put_session dout messages ceph: lockdep annotations for try_nonblocking_invalidate ceph: don't WARN if we're forcibly removing the session caps ceph: don't WARN if we're force umounting ceph: remove the capsnaps when removing caps ceph: request Fw caps before updating the mtime in ceph_write_iter ceph: reconnect to the export targets on new mdsmaps ceph: print more information when we can't find snaprealm ceph: add ceph_change_snap_realm() helper ceph: remove redundant initializations from mdsc and session ceph: cancel delayed work instead of flushing on mdsc teardown ceph: add a new vxattr to return auth mds for an inode ceph: remove some defunct forward declarations ceph: flush the mdlog before waiting on unsafe reqs ceph: flush mdlog before umounting ceph: make iterate_sessions a global symbol ceph: make ceph_create_session_msg a global symbol ceph: fix comment about short copies in ceph_write_end ceph: fix memory leak on decode error in ceph_handle_caps
This commit is contained in:
commit
8a05abd0c9
@ -1281,8 +1281,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
|
||||
dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
|
||||
inode, page, (int)pos, (int)copied, (int)len);
|
||||
|
||||
/* zero the stale part of the page if we did a short copy */
|
||||
if (!PageUptodate(page)) {
|
||||
/* just return that nothing was copied on a short copy */
|
||||
if (copied < len) {
|
||||
copied = 0;
|
||||
goto out;
|
||||
|
@ -26,12 +26,6 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
|
||||
void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp);
|
||||
void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci);
|
||||
|
||||
int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
|
||||
int ceph_readpages_from_fscache(struct inode *inode,
|
||||
struct address_space *mapping,
|
||||
struct list_head *pages,
|
||||
unsigned *nr_pages);
|
||||
|
||||
static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
|
||||
{
|
||||
ci->fscache = NULL;
|
||||
|
266
fs/ceph/caps.c
266
fs/ceph/caps.c
@ -703,29 +703,12 @@ void ceph_add_cap(struct inode *inode,
|
||||
*/
|
||||
struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc,
|
||||
realmino);
|
||||
if (realm) {
|
||||
struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
|
||||
if (oldrealm) {
|
||||
spin_lock(&oldrealm->inodes_with_caps_lock);
|
||||
list_del_init(&ci->i_snap_realm_item);
|
||||
spin_unlock(&oldrealm->inodes_with_caps_lock);
|
||||
}
|
||||
|
||||
spin_lock(&realm->inodes_with_caps_lock);
|
||||
list_add(&ci->i_snap_realm_item,
|
||||
&realm->inodes_with_caps);
|
||||
ci->i_snap_realm = realm;
|
||||
if (realm->ino == ci->i_vino.ino)
|
||||
realm->inode = inode;
|
||||
spin_unlock(&realm->inodes_with_caps_lock);
|
||||
|
||||
if (oldrealm)
|
||||
ceph_put_snap_realm(mdsc, oldrealm);
|
||||
} else {
|
||||
pr_err("ceph_add_cap: couldn't find snap realm %llx\n",
|
||||
realmino);
|
||||
WARN_ON(!realm);
|
||||
}
|
||||
if (realm)
|
||||
ceph_change_snap_realm(inode, realm);
|
||||
else
|
||||
WARN(1, "%s: couldn't find snap realm 0x%llx (ino 0x%llx oldrealm 0x%llx)\n",
|
||||
__func__, realmino, ci->i_vino.ino,
|
||||
ci->i_snap_realm ? ci->i_snap_realm->ino : 0);
|
||||
}
|
||||
|
||||
__check_cap_issue(ci, cap, issued);
|
||||
@ -1112,20 +1095,6 @@ int ceph_is_any_caps(struct inode *inode)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void drop_inode_snap_realm(struct ceph_inode_info *ci)
|
||||
{
|
||||
struct ceph_snap_realm *realm = ci->i_snap_realm;
|
||||
spin_lock(&realm->inodes_with_caps_lock);
|
||||
list_del_init(&ci->i_snap_realm_item);
|
||||
ci->i_snap_realm_counter++;
|
||||
ci->i_snap_realm = NULL;
|
||||
if (realm->ino == ci->i_vino.ino)
|
||||
realm->inode = NULL;
|
||||
spin_unlock(&realm->inodes_with_caps_lock);
|
||||
ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
|
||||
realm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a cap. Take steps to deal with a racing iterate_session_caps.
|
||||
*
|
||||
@ -1145,17 +1114,16 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
|
||||
return;
|
||||
}
|
||||
|
||||
lockdep_assert_held(&ci->i_ceph_lock);
|
||||
|
||||
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
|
||||
|
||||
mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
|
||||
|
||||
/* remove from inode's cap rbtree, and clear auth cap */
|
||||
rb_erase(&cap->ci_node, &ci->i_caps);
|
||||
if (ci->i_auth_cap == cap) {
|
||||
WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
|
||||
!mdsc->fsc->blocklisted);
|
||||
if (ci->i_auth_cap == cap)
|
||||
ci->i_auth_cap = NULL;
|
||||
}
|
||||
|
||||
/* remove from session list */
|
||||
spin_lock(&session->s_cap_lock);
|
||||
@ -1201,12 +1169,34 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
|
||||
* keep i_snap_realm.
|
||||
*/
|
||||
if (ci->i_wr_ref == 0 && ci->i_snap_realm)
|
||||
drop_inode_snap_realm(ci);
|
||||
ceph_change_snap_realm(&ci->vfs_inode, NULL);
|
||||
|
||||
__cap_delay_cancel(mdsc, ci);
|
||||
}
|
||||
}
|
||||
|
||||
void ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
|
||||
{
|
||||
struct ceph_inode_info *ci = cap->ci;
|
||||
struct ceph_fs_client *fsc;
|
||||
|
||||
/* 'ci' being NULL means the remove have already occurred */
|
||||
if (!ci) {
|
||||
dout("%s: cap inode is NULL\n", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
lockdep_assert_held(&ci->i_ceph_lock);
|
||||
|
||||
fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
|
||||
WARN_ON_ONCE(ci->i_auth_cap == cap &&
|
||||
!list_empty(&ci->i_dirty_item) &&
|
||||
!fsc->blocklisted &&
|
||||
READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
|
||||
|
||||
__ceph_remove_cap(cap, queue_release);
|
||||
}
|
||||
|
||||
struct cap_msg_args {
|
||||
struct ceph_mds_session *session;
|
||||
u64 ino, cid, follows;
|
||||
@ -1335,7 +1325,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
|
||||
while (p) {
|
||||
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
|
||||
p = rb_next(p);
|
||||
__ceph_remove_cap(cap, true);
|
||||
ceph_remove_cap(cap, true);
|
||||
}
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
}
|
||||
@ -1746,6 +1736,9 @@ struct ceph_cap_flush *ceph_alloc_cap_flush(void)
|
||||
struct ceph_cap_flush *cf;
|
||||
|
||||
cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
|
||||
if (!cf)
|
||||
return NULL;
|
||||
|
||||
cf->is_capsnap = false;
|
||||
return cf;
|
||||
}
|
||||
@ -1856,6 +1849,8 @@ static u64 __mark_caps_flushing(struct inode *inode,
|
||||
* try to invalidate mapping pages without blocking.
|
||||
*/
|
||||
static int try_nonblocking_invalidate(struct inode *inode)
|
||||
__releases(ci->i_ceph_lock)
|
||||
__acquires(ci->i_ceph_lock)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
u32 invalidating_gen = ci->i_rdcache_gen;
|
||||
@ -2219,6 +2214,7 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid)
|
||||
*/
|
||||
static int unsafe_request_wait(struct inode *inode)
|
||||
{
|
||||
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct ceph_mds_request *req1 = NULL, *req2 = NULL;
|
||||
int ret, err = 0;
|
||||
@ -2238,6 +2234,81 @@ static int unsafe_request_wait(struct inode *inode)
|
||||
}
|
||||
spin_unlock(&ci->i_unsafe_lock);
|
||||
|
||||
/*
|
||||
* Trigger to flush the journal logs in all the relevant MDSes
|
||||
* manually, or in the worst case we must wait at most 5 seconds
|
||||
* to wait the journal logs to be flushed by the MDSes periodically.
|
||||
*/
|
||||
if (req1 || req2) {
|
||||
struct ceph_mds_session **sessions = NULL;
|
||||
struct ceph_mds_session *s;
|
||||
struct ceph_mds_request *req;
|
||||
unsigned int max;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* The mdsc->max_sessions is unlikely to be changed
|
||||
* mostly, here we will retry it by reallocating the
|
||||
* sessions arrary memory to get rid of the mdsc->mutex
|
||||
* lock.
|
||||
*/
|
||||
retry:
|
||||
max = mdsc->max_sessions;
|
||||
sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO);
|
||||
if (!sessions)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock(&ci->i_unsafe_lock);
|
||||
if (req1) {
|
||||
list_for_each_entry(req, &ci->i_unsafe_dirops,
|
||||
r_unsafe_dir_item) {
|
||||
s = req->r_session;
|
||||
if (unlikely(s->s_mds > max)) {
|
||||
spin_unlock(&ci->i_unsafe_lock);
|
||||
goto retry;
|
||||
}
|
||||
if (!sessions[s->s_mds]) {
|
||||
s = ceph_get_mds_session(s);
|
||||
sessions[s->s_mds] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (req2) {
|
||||
list_for_each_entry(req, &ci->i_unsafe_iops,
|
||||
r_unsafe_target_item) {
|
||||
s = req->r_session;
|
||||
if (unlikely(s->s_mds > max)) {
|
||||
spin_unlock(&ci->i_unsafe_lock);
|
||||
goto retry;
|
||||
}
|
||||
if (!sessions[s->s_mds]) {
|
||||
s = ceph_get_mds_session(s);
|
||||
sessions[s->s_mds] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&ci->i_unsafe_lock);
|
||||
|
||||
/* the auth MDS */
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
if (ci->i_auth_cap) {
|
||||
s = ci->i_auth_cap->session;
|
||||
if (!sessions[s->s_mds])
|
||||
sessions[s->s_mds] = ceph_get_mds_session(s);
|
||||
}
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
/* send flush mdlog request to MDSes */
|
||||
for (i = 0; i < max; i++) {
|
||||
s = sessions[i];
|
||||
if (s) {
|
||||
send_flush_mdlog(s);
|
||||
ceph_put_mds_session(s);
|
||||
}
|
||||
}
|
||||
kfree(sessions);
|
||||
}
|
||||
|
||||
dout("unsafe_request_wait %p wait on tid %llu %llu\n",
|
||||
inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
|
||||
if (req1) {
|
||||
@ -3008,7 +3079,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
|
||||
}
|
||||
/* see comment in __ceph_remove_cap() */
|
||||
if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
|
||||
drop_inode_snap_realm(ci);
|
||||
ceph_change_snap_realm(inode, NULL);
|
||||
}
|
||||
}
|
||||
if (check_flushsnaps && __ceph_have_pending_cap_snap(ci)) {
|
||||
@ -3114,7 +3185,16 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
|
||||
break;
|
||||
}
|
||||
}
|
||||
BUG_ON(!found);
|
||||
|
||||
if (!found) {
|
||||
/*
|
||||
* The capsnap should already be removed when removing
|
||||
* auth cap in the case of a forced unmount.
|
||||
*/
|
||||
WARN_ON_ONCE(ci->i_auth_cap);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
capsnap->dirty_pages -= nr;
|
||||
if (capsnap->dirty_pages == 0) {
|
||||
complete_capsnap = true;
|
||||
@ -3136,6 +3216,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
|
||||
complete_capsnap ? " (complete capsnap)" : "");
|
||||
}
|
||||
|
||||
unlock:
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
if (last) {
|
||||
@ -3606,6 +3687,43 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
|
||||
bool *wake_ci, bool *wake_mdsc)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
|
||||
bool ret;
|
||||
|
||||
lockdep_assert_held(&ci->i_ceph_lock);
|
||||
|
||||
dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
|
||||
|
||||
list_del_init(&capsnap->ci_item);
|
||||
ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
|
||||
if (wake_ci)
|
||||
*wake_ci = ret;
|
||||
|
||||
spin_lock(&mdsc->cap_dirty_lock);
|
||||
if (list_empty(&ci->i_cap_flush_list))
|
||||
list_del_init(&ci->i_flushing_item);
|
||||
|
||||
ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
|
||||
if (wake_mdsc)
|
||||
*wake_mdsc = ret;
|
||||
spin_unlock(&mdsc->cap_dirty_lock);
|
||||
}
|
||||
|
||||
void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
|
||||
bool *wake_ci, bool *wake_mdsc)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
|
||||
lockdep_assert_held(&ci->i_ceph_lock);
|
||||
|
||||
WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
|
||||
__ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle FLUSHSNAP_ACK. MDS has flushed snap data to disk and we can
|
||||
* throw away our cap_snap.
|
||||
@ -3643,23 +3761,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
|
||||
capsnap, capsnap->follows);
|
||||
}
|
||||
}
|
||||
if (flushed) {
|
||||
WARN_ON(capsnap->dirty_pages || capsnap->writing);
|
||||
dout(" removing %p cap_snap %p follows %lld\n",
|
||||
inode, capsnap, follows);
|
||||
list_del(&capsnap->ci_item);
|
||||
wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
|
||||
|
||||
spin_lock(&mdsc->cap_dirty_lock);
|
||||
|
||||
if (list_empty(&ci->i_cap_flush_list))
|
||||
list_del_init(&ci->i_flushing_item);
|
||||
|
||||
wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
|
||||
&capsnap->cap_flush);
|
||||
spin_unlock(&mdsc->cap_dirty_lock);
|
||||
}
|
||||
if (flushed)
|
||||
ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
if (flushed) {
|
||||
ceph_put_snap_context(capsnap->context);
|
||||
ceph_put_cap_snap(capsnap);
|
||||
@ -3743,7 +3848,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
|
||||
goto out_unlock;
|
||||
|
||||
if (target < 0) {
|
||||
__ceph_remove_cap(cap, false);
|
||||
ceph_remove_cap(cap, false);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
@ -3778,7 +3883,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
|
||||
change_auth_cap_ses(ci, tcap->session);
|
||||
}
|
||||
}
|
||||
__ceph_remove_cap(cap, false);
|
||||
ceph_remove_cap(cap, false);
|
||||
goto out_unlock;
|
||||
} else if (tsession) {
|
||||
/* add placeholder for the export tagert */
|
||||
@ -3795,7 +3900,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
|
||||
spin_unlock(&mdsc->cap_dirty_lock);
|
||||
}
|
||||
|
||||
__ceph_remove_cap(cap, false);
|
||||
ceph_remove_cap(cap, false);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
@ -3906,7 +4011,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
|
||||
ocap->mseq, mds, le32_to_cpu(ph->seq),
|
||||
le32_to_cpu(ph->mseq));
|
||||
}
|
||||
__ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
|
||||
ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
|
||||
}
|
||||
|
||||
*old_issued = issued;
|
||||
@ -4134,8 +4239,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
|
||||
done:
|
||||
mutex_unlock(&session->s_mutex);
|
||||
done_unlocked:
|
||||
ceph_put_string(extra_info.pool_ns);
|
||||
iput(inode);
|
||||
out:
|
||||
ceph_put_string(extra_info.pool_ns);
|
||||
return;
|
||||
|
||||
flush_cap_releases:
|
||||
@ -4150,7 +4256,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
|
||||
bad:
|
||||
pr_err("ceph_handle_caps: corrupt message\n");
|
||||
ceph_msg_dump(msg);
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4225,33 +4331,9 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s)
|
||||
dout("flush_dirty_caps done\n");
|
||||
}
|
||||
|
||||
static void iterate_sessions(struct ceph_mds_client *mdsc,
|
||||
void (*cb)(struct ceph_mds_session *))
|
||||
{
|
||||
int mds;
|
||||
|
||||
mutex_lock(&mdsc->mutex);
|
||||
for (mds = 0; mds < mdsc->max_sessions; ++mds) {
|
||||
struct ceph_mds_session *s;
|
||||
|
||||
if (!mdsc->sessions[mds])
|
||||
continue;
|
||||
|
||||
s = ceph_get_mds_session(mdsc->sessions[mds]);
|
||||
if (!s)
|
||||
continue;
|
||||
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
cb(s);
|
||||
ceph_put_mds_session(s);
|
||||
mutex_lock(&mdsc->mutex);
|
||||
}
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
}
|
||||
|
||||
void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
|
||||
{
|
||||
iterate_sessions(mdsc, flush_dirty_session_caps);
|
||||
ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
|
||||
}
|
||||
|
||||
void __ceph_touch_fmode(struct ceph_inode_info *ci,
|
||||
|
@ -1722,22 +1722,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = file_remove_privs(file);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = file_update_time(file);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
inode_inc_iversion_raw(inode);
|
||||
|
||||
if (ci->i_inline_version != CEPH_INLINE_NONE) {
|
||||
err = ceph_uninline_data(file, NULL);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
down_read(&osdc->lock);
|
||||
map_flags = osdc->osdmap->flags;
|
||||
pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
|
||||
@ -1748,6 +1732,16 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = file_remove_privs(file);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (ci->i_inline_version != CEPH_INLINE_NONE) {
|
||||
err = ceph_uninline_data(file, NULL);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
|
||||
inode, ceph_vinop(inode), pos, count, i_size_read(inode));
|
||||
if (fi->fmode & CEPH_FILE_MODE_LAZY)
|
||||
@ -1759,6 +1753,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
err = file_update_time(file);
|
||||
if (err)
|
||||
goto out_caps;
|
||||
|
||||
inode_inc_iversion_raw(inode);
|
||||
|
||||
dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
|
||||
inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
|
||||
|
||||
@ -1842,6 +1842,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
}
|
||||
|
||||
goto out_unlocked;
|
||||
out_caps:
|
||||
ceph_put_cap_refs(ci, got);
|
||||
out:
|
||||
if (direct_lock)
|
||||
ceph_end_io_direct(inode);
|
||||
|
@ -581,16 +581,9 @@ void ceph_evict_inode(struct inode *inode)
|
||||
*/
|
||||
if (ci->i_snap_realm) {
|
||||
if (ceph_snap(inode) == CEPH_NOSNAP) {
|
||||
struct ceph_snap_realm *realm = ci->i_snap_realm;
|
||||
dout(" dropping residual ref to snap realm %p\n",
|
||||
realm);
|
||||
spin_lock(&realm->inodes_with_caps_lock);
|
||||
list_del_init(&ci->i_snap_realm_item);
|
||||
ci->i_snap_realm = NULL;
|
||||
if (realm->ino == ci->i_vino.ino)
|
||||
realm->inode = NULL;
|
||||
spin_unlock(&realm->inodes_with_caps_lock);
|
||||
ceph_put_snap_realm(mdsc, realm);
|
||||
ci->i_snap_realm);
|
||||
ceph_change_snap_realm(inode, NULL);
|
||||
} else {
|
||||
ceph_put_snapid_map(mdsc, ci->i_snapid_map);
|
||||
ci->i_snap_realm = NULL;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/bits.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/bitmap.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "mds_client.h"
|
||||
@ -652,14 +653,9 @@ const char *ceph_session_state_name(int s)
|
||||
|
||||
struct ceph_mds_session *ceph_get_mds_session(struct ceph_mds_session *s)
|
||||
{
|
||||
if (refcount_inc_not_zero(&s->s_ref)) {
|
||||
dout("mdsc get_session %p %d -> %d\n", s,
|
||||
refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref));
|
||||
if (refcount_inc_not_zero(&s->s_ref))
|
||||
return s;
|
||||
} else {
|
||||
dout("mdsc get_session %p 0 -- FAIL\n", s);
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void ceph_put_mds_session(struct ceph_mds_session *s)
|
||||
@ -667,8 +663,6 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
|
||||
if (IS_ERR_OR_NULL(s))
|
||||
return;
|
||||
|
||||
dout("mdsc put_session %p %d -> %d\n", s,
|
||||
refcount_read(&s->s_ref), refcount_read(&s->s_ref)-1);
|
||||
if (refcount_dec_and_test(&s->s_ref)) {
|
||||
if (s->s_auth.authorizer)
|
||||
ceph_auth_destroy_authorizer(s->s_auth.authorizer);
|
||||
@ -743,8 +737,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
|
||||
s->s_mdsc = mdsc;
|
||||
s->s_mds = mds;
|
||||
s->s_state = CEPH_MDS_SESSION_NEW;
|
||||
s->s_ttl = 0;
|
||||
s->s_seq = 0;
|
||||
mutex_init(&s->s_mutex);
|
||||
|
||||
ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
|
||||
@ -753,17 +745,11 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
|
||||
s->s_cap_ttl = jiffies - 1;
|
||||
|
||||
spin_lock_init(&s->s_cap_lock);
|
||||
s->s_renew_requested = 0;
|
||||
s->s_renew_seq = 0;
|
||||
INIT_LIST_HEAD(&s->s_caps);
|
||||
s->s_nr_caps = 0;
|
||||
refcount_set(&s->s_ref, 1);
|
||||
INIT_LIST_HEAD(&s->s_waiting);
|
||||
INIT_LIST_HEAD(&s->s_unsafe);
|
||||
xa_init(&s->s_delegated_inos);
|
||||
s->s_num_cap_releases = 0;
|
||||
s->s_cap_reconnect = 0;
|
||||
s->s_cap_iterator = NULL;
|
||||
INIT_LIST_HEAD(&s->s_cap_releases);
|
||||
INIT_WORK(&s->s_cap_release_work, ceph_cap_release_work);
|
||||
|
||||
@ -811,6 +797,33 @@ static void put_request_session(struct ceph_mds_request *req)
|
||||
}
|
||||
}
|
||||
|
||||
void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc,
|
||||
void (*cb)(struct ceph_mds_session *),
|
||||
bool check_state)
|
||||
{
|
||||
int mds;
|
||||
|
||||
mutex_lock(&mdsc->mutex);
|
||||
for (mds = 0; mds < mdsc->max_sessions; ++mds) {
|
||||
struct ceph_mds_session *s;
|
||||
|
||||
s = __ceph_lookup_mds_session(mdsc, mds);
|
||||
if (!s)
|
||||
continue;
|
||||
|
||||
if (check_state && !check_session_state(s)) {
|
||||
ceph_put_mds_session(s);
|
||||
continue;
|
||||
}
|
||||
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
cb(s);
|
||||
ceph_put_mds_session(s);
|
||||
mutex_lock(&mdsc->mutex);
|
||||
}
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
}
|
||||
|
||||
void ceph_mdsc_release_request(struct kref *kref)
|
||||
{
|
||||
struct ceph_mds_request *req = container_of(kref,
|
||||
@ -1155,7 +1168,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
|
||||
/*
|
||||
* session messages
|
||||
*/
|
||||
static struct ceph_msg *create_session_msg(u32 op, u64 seq)
|
||||
struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq)
|
||||
{
|
||||
struct ceph_msg *msg;
|
||||
struct ceph_mds_session_head *h;
|
||||
@ -1163,7 +1176,8 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
|
||||
msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS,
|
||||
false);
|
||||
if (!msg) {
|
||||
pr_err("create_session_msg ENOMEM creating msg\n");
|
||||
pr_err("ENOMEM creating session %s msg\n",
|
||||
ceph_session_op_name(op));
|
||||
return NULL;
|
||||
}
|
||||
h = msg->front.iov_base;
|
||||
@ -1294,7 +1308,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
|
||||
msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
|
||||
GFP_NOFS, false);
|
||||
if (!msg) {
|
||||
pr_err("create_session_msg ENOMEM creating msg\n");
|
||||
pr_err("ENOMEM creating session open msg\n");
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
p = msg->front.iov_base;
|
||||
@ -1583,14 +1597,39 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct ceph_cap_snap *capsnap;
|
||||
int capsnap_release = 0;
|
||||
|
||||
lockdep_assert_held(&ci->i_ceph_lock);
|
||||
|
||||
dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
|
||||
|
||||
while (!list_empty(&ci->i_cap_snaps)) {
|
||||
capsnap = list_first_entry(&ci->i_cap_snaps,
|
||||
struct ceph_cap_snap, ci_item);
|
||||
__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
|
||||
ceph_put_snap_context(capsnap->context);
|
||||
ceph_put_cap_snap(capsnap);
|
||||
capsnap_release++;
|
||||
}
|
||||
wake_up_all(&ci->i_cap_wq);
|
||||
wake_up_all(&mdsc->cap_flushing_wq);
|
||||
return capsnap_release;
|
||||
}
|
||||
|
||||
static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
void *arg)
|
||||
{
|
||||
struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
|
||||
struct ceph_mds_client *mdsc = fsc->mdsc;
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
LIST_HEAD(to_remove);
|
||||
bool dirty_dropped = false;
|
||||
bool invalidate = false;
|
||||
int capsnap_release = 0;
|
||||
|
||||
dout("removing cap %p, ci is %p, inode is %p\n",
|
||||
cap, ci, &ci->vfs_inode);
|
||||
@ -1598,7 +1637,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
__ceph_remove_cap(cap, false);
|
||||
if (!ci->i_auth_cap) {
|
||||
struct ceph_cap_flush *cf;
|
||||
struct ceph_mds_client *mdsc = fsc->mdsc;
|
||||
|
||||
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
|
||||
if (inode->i_data.nrpages > 0)
|
||||
@ -1662,6 +1700,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
|
||||
ci->i_prealloc_cap_flush = NULL;
|
||||
}
|
||||
|
||||
if (!list_empty(&ci->i_cap_snaps))
|
||||
capsnap_release = remove_capsnaps(mdsc, inode);
|
||||
}
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
while (!list_empty(&to_remove)) {
|
||||
@ -1678,6 +1719,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
|
||||
ceph_queue_invalidate(inode);
|
||||
if (dirty_dropped)
|
||||
iput(inode);
|
||||
while (capsnap_release--)
|
||||
iput(inode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1803,8 +1846,8 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
|
||||
|
||||
dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
|
||||
ceph_mds_state_name(state));
|
||||
msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
|
||||
++session->s_renew_seq);
|
||||
msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
|
||||
++session->s_renew_seq);
|
||||
if (!msg)
|
||||
return -ENOMEM;
|
||||
ceph_con_send(&session->s_con, msg);
|
||||
@ -1818,7 +1861,7 @@ static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
|
||||
|
||||
dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
|
||||
session->s_mds, ceph_session_state_name(session->s_state), seq);
|
||||
msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
|
||||
msg = ceph_create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
|
||||
if (!msg)
|
||||
return -ENOMEM;
|
||||
ceph_con_send(&session->s_con, msg);
|
||||
@ -1870,7 +1913,8 @@ static int request_close_session(struct ceph_mds_session *session)
|
||||
dout("request_close_session mds%d state %s seq %lld\n",
|
||||
session->s_mds, ceph_session_state_name(session->s_state),
|
||||
session->s_seq);
|
||||
msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
|
||||
msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_CLOSE,
|
||||
session->s_seq);
|
||||
if (!msg)
|
||||
return -ENOMEM;
|
||||
ceph_con_send(&session->s_con, msg);
|
||||
@ -1965,7 +2009,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
|
||||
|
||||
if (oissued) {
|
||||
/* we aren't the only cap.. just remove us */
|
||||
__ceph_remove_cap(cap, true);
|
||||
ceph_remove_cap(cap, true);
|
||||
(*remaining)--;
|
||||
} else {
|
||||
struct dentry *dentry;
|
||||
@ -4150,13 +4194,21 @@ static void check_new_map(struct ceph_mds_client *mdsc,
|
||||
struct ceph_mdsmap *newmap,
|
||||
struct ceph_mdsmap *oldmap)
|
||||
{
|
||||
int i;
|
||||
int i, j, err;
|
||||
int oldstate, newstate;
|
||||
struct ceph_mds_session *s;
|
||||
unsigned long targets[DIV_ROUND_UP(CEPH_MAX_MDS, sizeof(unsigned long))] = {0};
|
||||
|
||||
dout("check_new_map new %u old %u\n",
|
||||
newmap->m_epoch, oldmap->m_epoch);
|
||||
|
||||
if (newmap->m_info) {
|
||||
for (i = 0; i < newmap->possible_max_rank; i++) {
|
||||
for (j = 0; j < newmap->m_info[i].num_export_targets; j++)
|
||||
set_bit(newmap->m_info[i].export_targets[j], targets);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) {
|
||||
if (!mdsc->sessions[i])
|
||||
continue;
|
||||
@ -4210,6 +4262,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
|
||||
if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
|
||||
newstate >= CEPH_MDS_STATE_RECONNECT) {
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
clear_bit(i, targets);
|
||||
send_mds_reconnect(mdsc, s);
|
||||
mutex_lock(&mdsc->mutex);
|
||||
}
|
||||
@ -4232,6 +4285,51 @@ static void check_new_map(struct ceph_mds_client *mdsc,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Only open and reconnect sessions that don't exist yet.
|
||||
*/
|
||||
for (i = 0; i < newmap->possible_max_rank; i++) {
|
||||
/*
|
||||
* In case the import MDS is crashed just after
|
||||
* the EImportStart journal is flushed, so when
|
||||
* a standby MDS takes over it and is replaying
|
||||
* the EImportStart journal the new MDS daemon
|
||||
* will wait the client to reconnect it, but the
|
||||
* client may never register/open the session yet.
|
||||
*
|
||||
* Will try to reconnect that MDS daemon if the
|
||||
* rank number is in the export targets array and
|
||||
* is the up:reconnect state.
|
||||
*/
|
||||
newstate = ceph_mdsmap_get_state(newmap, i);
|
||||
if (!test_bit(i, targets) || newstate != CEPH_MDS_STATE_RECONNECT)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The session maybe registered and opened by some
|
||||
* requests which were choosing random MDSes during
|
||||
* the mdsc->mutex's unlock/lock gap below in rare
|
||||
* case. But the related MDS daemon will just queue
|
||||
* that requests and be still waiting for the client's
|
||||
* reconnection request in up:reconnect state.
|
||||
*/
|
||||
s = __ceph_lookup_mds_session(mdsc, i);
|
||||
if (likely(!s)) {
|
||||
s = __open_export_target_session(mdsc, i);
|
||||
if (IS_ERR(s)) {
|
||||
err = PTR_ERR(s);
|
||||
pr_err("failed to open export target session, err %d\n",
|
||||
err);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
dout("send reconnect to export target mds.%d\n", i);
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
send_mds_reconnect(mdsc, s);
|
||||
ceph_put_mds_session(s);
|
||||
mutex_lock(&mdsc->mutex);
|
||||
}
|
||||
|
||||
for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) {
|
||||
s = mdsc->sessions[i];
|
||||
if (!s)
|
||||
@ -4409,24 +4507,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
|
||||
}
|
||||
|
||||
/*
|
||||
* lock unlock sessions, to wait ongoing session activities
|
||||
* lock unlock the session, to wait ongoing session activities
|
||||
*/
|
||||
static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
|
||||
static void lock_unlock_session(struct ceph_mds_session *s)
|
||||
{
|
||||
int i;
|
||||
|
||||
mutex_lock(&mdsc->mutex);
|
||||
for (i = 0; i < mdsc->max_sessions; i++) {
|
||||
struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
|
||||
if (!s)
|
||||
continue;
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
mutex_lock(&s->s_mutex);
|
||||
mutex_unlock(&s->s_mutex);
|
||||
ceph_put_mds_session(s);
|
||||
mutex_lock(&mdsc->mutex);
|
||||
}
|
||||
mutex_unlock(&mdsc->mutex);
|
||||
mutex_lock(&s->s_mutex);
|
||||
mutex_unlock(&s->s_mutex);
|
||||
}
|
||||
|
||||
static void maybe_recover_session(struct ceph_mds_client *mdsc)
|
||||
@ -4448,6 +4534,8 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
|
||||
|
||||
bool check_session_state(struct ceph_mds_session *s)
|
||||
{
|
||||
struct ceph_fs_client *fsc = s->s_mdsc->fsc;
|
||||
|
||||
switch (s->s_state) {
|
||||
case CEPH_MDS_SESSION_OPEN:
|
||||
if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
|
||||
@ -4456,8 +4544,9 @@ bool check_session_state(struct ceph_mds_session *s)
|
||||
}
|
||||
break;
|
||||
case CEPH_MDS_SESSION_CLOSING:
|
||||
/* Should never reach this when we're unmounting */
|
||||
WARN_ON_ONCE(s->s_ttl);
|
||||
/* Should never reach this when not force unmounting */
|
||||
WARN_ON_ONCE(s->s_ttl &&
|
||||
READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
|
||||
fallthrough;
|
||||
case CEPH_MDS_SESSION_NEW:
|
||||
case CEPH_MDS_SESSION_RESTARTING:
|
||||
@ -4584,21 +4673,12 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
|
||||
init_completion(&mdsc->safe_umount_waiters);
|
||||
init_waitqueue_head(&mdsc->session_close_wq);
|
||||
INIT_LIST_HEAD(&mdsc->waiting_for_map);
|
||||
mdsc->sessions = NULL;
|
||||
atomic_set(&mdsc->num_sessions, 0);
|
||||
mdsc->max_sessions = 0;
|
||||
mdsc->stopping = 0;
|
||||
atomic64_set(&mdsc->quotarealms_count, 0);
|
||||
mdsc->quotarealms_inodes = RB_ROOT;
|
||||
mutex_init(&mdsc->quotarealms_inodes_mutex);
|
||||
mdsc->last_snap_seq = 0;
|
||||
init_rwsem(&mdsc->snap_rwsem);
|
||||
mdsc->snap_realms = RB_ROOT;
|
||||
INIT_LIST_HEAD(&mdsc->snap_empty);
|
||||
mdsc->num_snap_realms = 0;
|
||||
spin_lock_init(&mdsc->snap_empty_lock);
|
||||
mdsc->last_tid = 0;
|
||||
mdsc->oldest_tid = 0;
|
||||
mdsc->request_tree = RB_ROOT;
|
||||
INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
|
||||
mdsc->last_renew_caps = jiffies;
|
||||
@ -4610,11 +4690,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
|
||||
mdsc->last_cap_flush_tid = 1;
|
||||
INIT_LIST_HEAD(&mdsc->cap_flush_list);
|
||||
INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
|
||||
mdsc->num_cap_flushing = 0;
|
||||
spin_lock_init(&mdsc->cap_dirty_lock);
|
||||
init_waitqueue_head(&mdsc->cap_flushing_wq);
|
||||
INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
|
||||
atomic_set(&mdsc->cap_reclaim_pending, 0);
|
||||
err = ceph_metric_init(&mdsc->metric);
|
||||
if (err)
|
||||
goto err_mdsmap;
|
||||
@ -4676,6 +4754,30 @@ static void wait_requests(struct ceph_mds_client *mdsc)
|
||||
dout("wait_requests done\n");
|
||||
}
|
||||
|
||||
void send_flush_mdlog(struct ceph_mds_session *s)
|
||||
{
|
||||
struct ceph_msg *msg;
|
||||
|
||||
/*
|
||||
* Pre-luminous MDS crashes when it sees an unknown session request
|
||||
*/
|
||||
if (!CEPH_HAVE_FEATURE(s->s_con.peer_features, SERVER_LUMINOUS))
|
||||
return;
|
||||
|
||||
mutex_lock(&s->s_mutex);
|
||||
dout("request mdlog flush to mds%d (%s)s seq %lld\n", s->s_mds,
|
||||
ceph_session_state_name(s->s_state), s->s_seq);
|
||||
msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG,
|
||||
s->s_seq);
|
||||
if (!msg) {
|
||||
pr_err("failed to request mdlog flush to mds%d (%s) seq %lld\n",
|
||||
s->s_mds, ceph_session_state_name(s->s_state), s->s_seq);
|
||||
} else {
|
||||
ceph_con_send(&s->s_con, msg);
|
||||
}
|
||||
mutex_unlock(&s->s_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* called before mount is ro, and before dentries are torn down.
|
||||
* (hmm, does this still race with new lookups?)
|
||||
@ -4685,7 +4787,8 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
|
||||
dout("pre_umount\n");
|
||||
mdsc->stopping = 1;
|
||||
|
||||
lock_unlock_sessions(mdsc);
|
||||
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
|
||||
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
|
||||
ceph_flush_dirty_caps(mdsc);
|
||||
wait_requests(mdsc);
|
||||
|
||||
@ -4912,7 +5015,6 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
|
||||
|
||||
ceph_metric_destroy(&mdsc->metric);
|
||||
|
||||
flush_delayed_work(&mdsc->metric.delayed_work);
|
||||
fsc->mdsc = NULL;
|
||||
kfree(mdsc);
|
||||
dout("mdsc_destroy %p done\n", mdsc);
|
||||
|
@ -522,6 +522,11 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
|
||||
kref_put(&req->r_kref, ceph_mdsc_release_request);
|
||||
}
|
||||
|
||||
extern void send_flush_mdlog(struct ceph_mds_session *s);
|
||||
extern void ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc,
|
||||
void (*cb)(struct ceph_mds_session *),
|
||||
bool check_state);
|
||||
extern struct ceph_msg *ceph_create_session_msg(u32 op, u64 seq);
|
||||
extern void __ceph_queue_cap_release(struct ceph_mds_session *session,
|
||||
struct ceph_cap *cap);
|
||||
extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
|
||||
|
@ -122,6 +122,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
|
||||
int err;
|
||||
u8 mdsmap_v;
|
||||
u16 mdsmap_ev;
|
||||
u32 target;
|
||||
|
||||
m = kzalloc(sizeof(*m), GFP_NOFS);
|
||||
if (!m)
|
||||
@ -260,9 +261,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
|
||||
sizeof(u32), GFP_NOFS);
|
||||
if (!info->export_targets)
|
||||
goto nomem;
|
||||
for (j = 0; j < num_export_targets; j++)
|
||||
info->export_targets[j] =
|
||||
ceph_decode_32(&pexport_targets);
|
||||
for (j = 0; j < num_export_targets; j++) {
|
||||
target = ceph_decode_32(&pexport_targets);
|
||||
if (target >= m->possible_max_rank) {
|
||||
err = -EIO;
|
||||
goto corrupt;
|
||||
}
|
||||
info->export_targets[j] = target;
|
||||
}
|
||||
} else {
|
||||
info->export_targets = NULL;
|
||||
}
|
||||
|
@ -302,6 +302,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
|
||||
if (!m)
|
||||
return;
|
||||
|
||||
cancel_delayed_work_sync(&m->delayed_work);
|
||||
|
||||
percpu_counter_destroy(&m->total_inodes);
|
||||
percpu_counter_destroy(&m->opened_inodes);
|
||||
percpu_counter_destroy(&m->i_caps_mis);
|
||||
@ -309,8 +311,6 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
|
||||
percpu_counter_destroy(&m->d_lease_mis);
|
||||
percpu_counter_destroy(&m->d_lease_hit);
|
||||
|
||||
cancel_delayed_work_sync(&m->delayed_work);
|
||||
|
||||
ceph_put_mds_session(m->session);
|
||||
}
|
||||
|
||||
|
@ -849,6 +849,43 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
|
||||
dout("flush_snaps done\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* ceph_change_snap_realm - change the snap_realm for an inode
|
||||
* @inode: inode to move to new snap realm
|
||||
* @realm: new realm to move inode into (may be NULL)
|
||||
*
|
||||
* Detach an inode from its old snaprealm (if any) and attach it to
|
||||
* the new snaprealm (if any). The old snap realm reference held by
|
||||
* the inode is put. If realm is non-NULL, then the caller's reference
|
||||
* to it is taken over by the inode.
|
||||
*/
|
||||
void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm)
|
||||
{
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
|
||||
struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
|
||||
|
||||
lockdep_assert_held(&ci->i_ceph_lock);
|
||||
|
||||
if (oldrealm) {
|
||||
spin_lock(&oldrealm->inodes_with_caps_lock);
|
||||
list_del_init(&ci->i_snap_realm_item);
|
||||
if (oldrealm->ino == ci->i_vino.ino)
|
||||
oldrealm->inode = NULL;
|
||||
spin_unlock(&oldrealm->inodes_with_caps_lock);
|
||||
ceph_put_snap_realm(mdsc, oldrealm);
|
||||
}
|
||||
|
||||
ci->i_snap_realm = realm;
|
||||
|
||||
if (realm) {
|
||||
spin_lock(&realm->inodes_with_caps_lock);
|
||||
list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps);
|
||||
if (realm->ino == ci->i_vino.ino)
|
||||
realm->inode = inode;
|
||||
spin_unlock(&realm->inodes_with_caps_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle a snap notification from the MDS.
|
||||
@ -935,7 +972,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
|
||||
};
|
||||
struct inode *inode = ceph_find_inode(sb, vino);
|
||||
struct ceph_inode_info *ci;
|
||||
struct ceph_snap_realm *oldrealm;
|
||||
|
||||
if (!inode)
|
||||
continue;
|
||||
@ -960,27 +996,10 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
|
||||
}
|
||||
dout(" will move %p to split realm %llx %p\n",
|
||||
inode, realm->ino, realm);
|
||||
/*
|
||||
* Move the inode to the new realm
|
||||
*/
|
||||
oldrealm = ci->i_snap_realm;
|
||||
spin_lock(&oldrealm->inodes_with_caps_lock);
|
||||
list_del_init(&ci->i_snap_realm_item);
|
||||
spin_unlock(&oldrealm->inodes_with_caps_lock);
|
||||
|
||||
spin_lock(&realm->inodes_with_caps_lock);
|
||||
list_add(&ci->i_snap_realm_item,
|
||||
&realm->inodes_with_caps);
|
||||
ci->i_snap_realm = realm;
|
||||
if (realm->ino == ci->i_vino.ino)
|
||||
realm->inode = inode;
|
||||
spin_unlock(&realm->inodes_with_caps_lock);
|
||||
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
ceph_get_snap_realm(mdsc, realm);
|
||||
ceph_put_snap_realm(mdsc, oldrealm);
|
||||
|
||||
ceph_change_snap_realm(inode, realm);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
iput(inode);
|
||||
continue;
|
||||
|
||||
|
@ -46,6 +46,7 @@ const char *ceph_session_op_name(int op)
|
||||
case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack";
|
||||
case CEPH_SESSION_FORCE_RO: return "force_ro";
|
||||
case CEPH_SESSION_REJECT: return "reject";
|
||||
case CEPH_SESSION_REQUEST_FLUSH_MDLOG: return "flush_mdlog";
|
||||
}
|
||||
return "???";
|
||||
}
|
||||
|
@ -418,7 +418,6 @@ struct ceph_inode_info {
|
||||
struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
|
||||
struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
|
||||
};
|
||||
int i_snap_realm_counter; /* snap realm (if caps) */
|
||||
struct list_head i_snap_realm_item;
|
||||
struct list_head i_snap_flush_item;
|
||||
struct timespec64 i_btime;
|
||||
@ -929,6 +928,7 @@ extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
|
||||
extern int ceph_update_snap_trace(struct ceph_mds_client *m,
|
||||
void *p, void *e, bool deletion,
|
||||
struct ceph_snap_realm **realm_ret);
|
||||
void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm);
|
||||
extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
|
||||
struct ceph_mds_session *session,
|
||||
struct ceph_msg *msg);
|
||||
@ -1138,6 +1138,7 @@ extern void ceph_add_cap(struct inode *inode,
|
||||
unsigned cap, unsigned seq, u64 realmino, int flags,
|
||||
struct ceph_cap **new_cap);
|
||||
extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
|
||||
extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
|
||||
extern void __ceph_remove_caps(struct ceph_inode_info *ci);
|
||||
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
|
||||
struct ceph_cap *cap);
|
||||
@ -1163,6 +1164,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
|
||||
int had);
|
||||
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
|
||||
struct ceph_snap_context *snapc);
|
||||
extern void __ceph_remove_capsnap(struct inode *inode,
|
||||
struct ceph_cap_snap *capsnap,
|
||||
bool *wake_ci, bool *wake_mdsc);
|
||||
extern void ceph_remove_capsnap(struct inode *inode,
|
||||
struct ceph_cap_snap *capsnap,
|
||||
bool *wake_ci, bool *wake_mdsc);
|
||||
extern void ceph_flush_snaps(struct ceph_inode_info *ci,
|
||||
struct ceph_mds_session **psession);
|
||||
extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
|
||||
|
@ -340,6 +340,18 @@ static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
|
||||
ceph_cap_string(issued), issued);
|
||||
}
|
||||
|
||||
static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci,
|
||||
char *val, size_t size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
ret = ceph_fmt_xattr(val, size, "%d",
|
||||
ci->i_auth_cap ? ci->i_auth_cap->session->s_mds : -1);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
|
||||
#define CEPH_XATTR_NAME2(_type, _name, _name2) \
|
||||
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
|
||||
@ -473,6 +485,13 @@ static struct ceph_vxattr ceph_common_vxattrs[] = {
|
||||
.exists_cb = NULL,
|
||||
.flags = VXATTR_FLAG_READONLY,
|
||||
},
|
||||
{
|
||||
.name = "ceph.auth_mds",
|
||||
.name_size = sizeof("ceph.auth_mds"),
|
||||
.getxattr_cb = ceph_vxattrcb_auth_mds,
|
||||
.exists_cb = NULL,
|
||||
.flags = VXATTR_FLAG_READONLY,
|
||||
},
|
||||
{ .name = NULL, 0 } /* Required table terminator */
|
||||
};
|
||||
|
||||
|
@ -299,6 +299,7 @@ enum {
|
||||
CEPH_SESSION_FLUSHMSG_ACK,
|
||||
CEPH_SESSION_FORCE_RO,
|
||||
CEPH_SESSION_REJECT,
|
||||
CEPH_SESSION_REQUEST_FLUSH_MDLOG,
|
||||
};
|
||||
|
||||
extern const char *ceph_session_op_name(int op);
|
||||
|
Loading…
Reference in New Issue
Block a user