mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-10 07:10:27 +00:00
Two patches to improve RBD exclusive lock interaction with
osd_request_timeout option and another fix to reduce the potential for erroneous blocklisting -- this time in CephFS. All going to stable. -----BEGIN PGP SIGNATURE----- iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmTNFFUTHGlkcnlvbW92 QGdtYWlsLmNvbQAKCRBKf944AhHzi5I8B/9a8C5ed0XfTadHcHX5VQsY3b//4rgp 0VYkQbjYnSCwrYRIPsvnL8LeLHzbcPGLpFAQXg7uUlmJ5dpaOz303hKmKt5GdyOR qvWka3K4zeG177b6yc1srqs0cEsCLpQrn+krnvOl5v87QdFsCP/bsJMOrJ9mlhdM 9GjkjDRn6jvNyOLGbn3kIvwCRF9NH6/nHzjBcTUzvS8fBUye02o9C1H6ZQ7sYjKH sJnmQCNCFHEqdaVjDZ7mw/doIrAbmTV6sgusuPjiF5bHILzX4oWG4UJmRpHFV//S JPQgMp2DNjP8tW9aCVLVVVV5t5AKBr84etF59DaFNflk27U3COJWkE0a =gw7n -----END PGP SIGNATURE----- Merge tag 'ceph-for-6.5-rc5' of https://github.com/ceph/ceph-client Pull ceph fixes from Ilya Dryomov: "Two patches to improve RBD exclusive lock interaction with osd_request_timeout option and another fix to reduce the potential for erroneous blocklisting -- this time in CephFS. All going to stable" * tag 'ceph-for-6.5-rc5' of https://github.com/ceph/ceph-client: libceph: fix potential hang in ceph_osdc_notify() rbd: prevent busy loop when requesting exclusive lock ceph: defer stopping mdsc delayed_work
This commit is contained in:
commit
4593f3c2c6
@ -3675,7 +3675,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
|
||||
ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
|
||||
RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
|
||||
RBD_LOCK_TAG, "", 0);
|
||||
if (ret)
|
||||
if (ret && ret != -EEXIST)
|
||||
return ret;
|
||||
|
||||
__rbd_lock(rbd_dev, cookie);
|
||||
@ -3878,7 +3878,7 @@ static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
|
||||
&rbd_dev->header_oloc, RBD_LOCK_NAME,
|
||||
&lock_type, &lock_tag, &lockers, &num_lockers);
|
||||
if (ret) {
|
||||
rbd_warn(rbd_dev, "failed to retrieve lockers: %d", ret);
|
||||
rbd_warn(rbd_dev, "failed to get header lockers: %d", ret);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
@ -3940,8 +3940,10 @@ static int find_watcher(struct rbd_device *rbd_dev,
|
||||
ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
|
||||
&rbd_dev->header_oloc, &watchers,
|
||||
&num_watchers);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
rbd_warn(rbd_dev, "failed to get watchers: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
|
||||
for (i = 0; i < num_watchers; i++) {
|
||||
@ -3985,8 +3987,12 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
|
||||
locker = refreshed_locker = NULL;
|
||||
|
||||
ret = rbd_lock(rbd_dev);
|
||||
if (ret != -EBUSY)
|
||||
if (!ret)
|
||||
goto out;
|
||||
if (ret != -EBUSY) {
|
||||
rbd_warn(rbd_dev, "failed to lock header: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* determine if the current lock holder is still alive */
|
||||
locker = get_lock_owner_info(rbd_dev);
|
||||
@ -4089,11 +4095,8 @@ static int rbd_try_acquire_lock(struct rbd_device *rbd_dev)
|
||||
|
||||
ret = rbd_try_lock(rbd_dev);
|
||||
if (ret < 0) {
|
||||
rbd_warn(rbd_dev, "failed to lock header: %d", ret);
|
||||
if (ret == -EBLOCKLISTED)
|
||||
goto out;
|
||||
|
||||
ret = 1; /* request lock anyway */
|
||||
rbd_warn(rbd_dev, "failed to acquire lock: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
if (ret > 0) {
|
||||
up_write(&rbd_dev->lock_rwsem);
|
||||
@ -6627,12 +6630,11 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
|
||||
cancel_delayed_work_sync(&rbd_dev->lock_dwork);
|
||||
if (!ret)
|
||||
ret = -ETIMEDOUT;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
|
||||
return ret;
|
||||
rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret);
|
||||
}
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* The lock may have been released by now, unless automatic lock
|
||||
|
@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
|
||||
|
||||
dout("mdsc delayed_work\n");
|
||||
|
||||
if (mdsc->stopping)
|
||||
if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
|
||||
return;
|
||||
|
||||
mutex_lock(&mdsc->mutex);
|
||||
@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
|
||||
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
|
||||
{
|
||||
dout("pre_umount\n");
|
||||
mdsc->stopping = 1;
|
||||
mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
|
||||
|
||||
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
|
||||
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
|
||||
|
@ -380,6 +380,11 @@ struct cap_wait {
|
||||
int want;
|
||||
};
|
||||
|
||||
enum {
|
||||
CEPH_MDSC_STOPPING_BEGIN = 1,
|
||||
CEPH_MDSC_STOPPING_FLUSHED = 2,
|
||||
};
|
||||
|
||||
/*
|
||||
* mds client state
|
||||
*/
|
||||
|
@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
|
||||
ceph_mdsc_pre_umount(fsc->mdsc);
|
||||
flush_fs_workqueues(fsc);
|
||||
|
||||
/*
|
||||
* Though the kill_anon_super() will finally trigger the
|
||||
* sync_filesystem() anyway, we still need to do it here
|
||||
* and then bump the stage of shutdown to stop the work
|
||||
* queue as earlier as possible.
|
||||
*/
|
||||
sync_filesystem(s);
|
||||
|
||||
fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
|
||||
|
||||
kill_anon_super(s);
|
||||
|
||||
fsc->client->extra_mon_dispatch = NULL;
|
||||
|
@ -3334,17 +3334,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
|
||||
int ret;
|
||||
|
||||
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
|
||||
ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
|
||||
ret = wait_for_completion_killable(&lreq->reg_commit_wait);
|
||||
return ret ?: lreq->reg_commit_error;
|
||||
}
|
||||
|
||||
static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
|
||||
static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
|
||||
unsigned long timeout)
|
||||
{
|
||||
int ret;
|
||||
long left;
|
||||
|
||||
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
|
||||
ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
|
||||
return ret ?: lreq->notify_finish_error;
|
||||
left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
|
||||
ceph_timeout_jiffies(timeout));
|
||||
if (left <= 0)
|
||||
left = left ?: -ETIMEDOUT;
|
||||
else
|
||||
left = lreq->notify_finish_error; /* completed */
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4896,7 +4903,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
|
||||
linger_submit(lreq);
|
||||
ret = linger_reg_commit_wait(lreq);
|
||||
if (!ret)
|
||||
ret = linger_notify_finish_wait(lreq);
|
||||
ret = linger_notify_finish_wait(lreq,
|
||||
msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
|
||||
else
|
||||
dout("lreq %p failed to initiate notify %d\n", lreq, ret);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user