2018-06-05 19:42:14 -07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2014-07-15 08:07:01 +10:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2014 Red Hat, Inc.
|
|
|
|
* All Rights Reserved.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "xfs.h"
|
2016-05-18 10:58:51 +10:00
|
|
|
#include "xfs_shared.h"
|
2016-03-15 11:42:44 +11:00
|
|
|
#include "xfs_format.h"
|
2014-07-15 08:07:29 +10:00
|
|
|
#include "xfs_log_format.h"
|
2016-03-15 11:42:44 +11:00
|
|
|
#include "xfs_trans_resv.h"
|
2016-05-18 10:58:51 +10:00
|
|
|
#include "xfs_sysfs.h"
|
xfs: AIL needs asynchronous CIL forcing
The AIL pushing is stalling on log forces when it comes across
pinned items. This is happening on removal workloads where the AIL
is dominated by stale items that are removed from AIL when the
checkpoint that marks the items stale is committed to the journal.
This results is relatively few items in the AIL, but those that are
are often pinned as directories items are being removed from are
still being logged.
As a result, many push cycles through the CIL will first issue a
blocking log force to unpin the items. This can take some time to
complete, with tracing regularly showing push delays of half a
second and sometimes up into the range of several seconds. Sequences
like this aren't uncommon:
....
399.829437: xfsaild: last lsn 0x11002dd000 count 101 stuck 101 flushing 0 tout 20
<wanted 20ms, got 270ms delay>
400.099622: xfsaild: target 0x11002f3600, prev 0x11002f3600, last lsn 0x0
400.099623: xfsaild: first lsn 0x11002f3600
400.099679: xfsaild: last lsn 0x1100305000 count 16 stuck 11 flushing 0 tout 50
<wanted 50ms, got 500ms delay>
400.589348: xfsaild: target 0x110032e600, prev 0x11002f3600, last lsn 0x0
400.589349: xfsaild: first lsn 0x1100305000
400.589595: xfsaild: last lsn 0x110032e600 count 156 stuck 101 flushing 30 tout 50
<wanted 50ms, got 460ms delay>
400.950341: xfsaild: target 0x1100353000, prev 0x110032e600, last lsn 0x0
400.950343: xfsaild: first lsn 0x1100317c00
400.950436: xfsaild: last lsn 0x110033d200 count 105 stuck 101 flushing 0 tout 20
<wanted 20ms, got 200ms delay>
401.142333: xfsaild: target 0x1100361600, prev 0x1100353000, last lsn 0x0
401.142334: xfsaild: first lsn 0x110032e600
401.142535: xfsaild: last lsn 0x1100353000 count 122 stuck 101 flushing 8 tout 10
<wanted 10ms, got 10ms delay>
401.154323: xfsaild: target 0x1100361600, prev 0x1100361600, last lsn 0x1100353000
401.154328: xfsaild: first lsn 0x1100353000
401.154389: xfsaild: last lsn 0x1100353000 count 101 stuck 101 flushing 0 tout 20
<wanted 20ms, got 300ms delay>
401.451525: xfsaild: target 0x1100361600, prev 0x1100361600, last lsn 0x0
401.451526: xfsaild: first lsn 0x1100353000
401.451804: xfsaild: last lsn 0x1100377200 count 170 stuck 22 flushing 122 tout 50
<wanted 50ms, got 500ms delay>
401.933581: xfsaild: target 0x1100361600, prev 0x1100361600, last lsn 0x0
....
In each of these cases, every AIL pass saw 101 log items stuck on
the AIL (pinned) with very few other items being found. Each pass, a
log force was issued, and delay between last/first is the sleep time
+ the sync log force time.
Some of these 101 items pinned the tail of the log. The tail of the
log does slowly creep forward (first lsn), but the problem is that
the log is actually out of reservation space because it's been
running so many transactions that stale items that never reach the
AIL but consume log space. Hence we have a largely empty AIL, with
long term pins on items that pin the tail of the log that don't get
pushed frequently enough to keep log space available.
The problem is the hundreds of milliseconds that we block in the log
force pushing the CIL out to disk. The AIL should not be stalled
like this - it needs to run and flush items that are at the tail of
the log with minimal latency. What we really need to do is trigger a
log flush, but then not wait for it at all - we've already done our
waiting for stuff to complete when we backed off prior to the log
force being issued.
Even if we remove the XFS_LOG_SYNC from the xfs_log_force() call, we
still do a blocking flush of the CIL and that is what is causing the
issue. Hence we need a new interface for the CIL to trigger an
immediate background push of the CIL to get it moving faster but not
to wait on that to occur. While the CIL is pushing, the AIL can also
be pushing.
We already have an internal interface to do this -
xlog_cil_push_now() - but we need a wrapper for it to be used
externally. xlog_cil_force_seq() can easily be extended to do what
we need as it already implements the synchronous CIL push via
xlog_cil_push_now(). Add the necessary flags and "push current
sequence" semantics to xlog_cil_force_seq() and convert the AIL
pushing to use it.
One of the complexities here is that the CIL push does not guarantee
that the commit record for the CIL checkpoint is written to disk.
The current log force ensures this by submitting the current ACTIVE
iclog that the commit record was written to. We need the CIL to
actually write this commit record to disk for an async push to
ensure that the checkpoint actually makes it to disk and unpins the
pinned items in the checkpoint on completion. Hence we need to pass
down to the CIL push that we are doing an async flush so that it can
switch out the commit_iclog if necessary to get written to disk when
the commit iclog is finally released.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
2021-08-10 18:00:44 -07:00
|
|
|
#include "xfs_log.h"
|
2014-07-15 08:07:29 +10:00
|
|
|
#include "xfs_log_priv.h"
|
2016-03-15 11:42:44 +11:00
|
|
|
#include "xfs_mount.h"
|
2014-07-15 08:07:01 +10:00
|
|
|
|
|
|
|
struct xfs_sysfs_attr {
|
|
|
|
struct attribute attr;
|
2015-10-12 05:18:45 +11:00
|
|
|
ssize_t (*show)(struct kobject *kobject, char *buf);
|
|
|
|
ssize_t (*store)(struct kobject *kobject, const char *buf,
|
|
|
|
size_t count);
|
2014-07-15 08:07:01 +10:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct xfs_sysfs_attr *
|
|
|
|
to_attr(struct attribute *attr)
|
|
|
|
{
|
|
|
|
return container_of(attr, struct xfs_sysfs_attr, attr);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define XFS_SYSFS_ATTR_RW(name) \
|
|
|
|
static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name)
|
|
|
|
#define XFS_SYSFS_ATTR_RO(name) \
|
|
|
|
static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name)
|
2015-10-12 05:15:45 +11:00
|
|
|
#define XFS_SYSFS_ATTR_WO(name) \
|
|
|
|
static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_WO(name)
|
2014-07-15 08:07:01 +10:00
|
|
|
|
|
|
|
#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
|
|
|
|
|
2015-10-12 05:18:45 +11:00
|
|
|
STATIC ssize_t
|
|
|
|
xfs_sysfs_object_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
struct attribute *attr,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
|
|
|
|
|
|
|
|
return xfs_attr->show ? xfs_attr->show(kobject, buf) : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
xfs_sysfs_object_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
struct attribute *attr,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
|
|
|
|
|
|
|
|
return xfs_attr->store ? xfs_attr->store(kobject, buf, count) : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct sysfs_ops xfs_sysfs_ops = {
|
|
|
|
.show = xfs_sysfs_object_show,
|
|
|
|
.store = xfs_sysfs_object_store,
|
|
|
|
};
|
|
|
|
|
2016-03-15 11:42:44 +11:00
|
|
|
static struct attribute *xfs_mp_attrs[] = {
|
|
|
|
NULL,
|
|
|
|
};
|
2022-01-03 11:10:18 -08:00
|
|
|
ATTRIBUTE_GROUPS(xfs_mp);
|
2016-03-15 11:42:44 +11:00
|
|
|
|
2023-02-09 18:56:48 -08:00
|
|
|
const struct kobj_type xfs_mp_ktype = {
|
2016-03-15 11:42:44 +11:00
|
|
|
.release = xfs_sysfs_release,
|
|
|
|
.sysfs_ops = &xfs_sysfs_ops,
|
2022-01-03 11:10:18 -08:00
|
|
|
.default_groups = xfs_mp_groups,
|
2016-03-15 11:42:44 +11:00
|
|
|
};
|
|
|
|
|
2014-09-09 11:52:42 +10:00
|
|
|
#ifdef DEBUG
|
|
|
|
/* debug */
|
|
|
|
|
2017-06-14 21:29:12 -07:00
|
|
|
STATIC ssize_t
|
|
|
|
bug_on_assert_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (val == 1)
|
|
|
|
xfs_globals.bug_on_assert = true;
|
|
|
|
else if (val == 0)
|
|
|
|
xfs_globals.bug_on_assert = false;
|
|
|
|
else
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
bug_on_assert_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d\n", xfs_globals.bug_on_assert);
|
2017-06-14 21:29:12 -07:00
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(bug_on_assert);
|
|
|
|
|
2014-09-09 11:56:13 +10:00
|
|
|
STATIC ssize_t
|
|
|
|
log_recovery_delay_store(
|
2015-10-12 05:18:45 +11:00
|
|
|
struct kobject *kobject,
|
2014-09-09 11:56:13 +10:00
|
|
|
const char *buf,
|
2015-10-12 05:18:45 +11:00
|
|
|
size_t count)
|
2014-09-09 11:56:13 +10:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (val < 0 || val > 60)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
xfs_globals.log_recovery_delay = val;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
log_recovery_delay_show(
|
2015-10-12 05:18:45 +11:00
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
2014-09-09 11:56:13 +10:00
|
|
|
{
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d\n", xfs_globals.log_recovery_delay);
|
2014-09-09 11:56:13 +10:00
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(log_recovery_delay);
|
|
|
|
|
2018-05-10 21:50:23 -07:00
|
|
|
STATIC ssize_t
|
|
|
|
mount_delay_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (val < 0 || val > 60)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
xfs_globals.mount_delay = val;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
mount_delay_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d\n", xfs_globals.mount_delay);
|
2018-05-10 21:50:23 -07:00
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(mount_delay);
|
|
|
|
|
2019-02-18 09:38:49 -08:00
|
|
|
static ssize_t
|
|
|
|
always_cow_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
ret = kstrtobool(buf, &xfs_globals.always_cow);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
always_cow_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d\n", xfs_globals.always_cow);
|
2019-02-18 09:38:49 -08:00
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(always_cow);
|
|
|
|
|
2019-07-03 07:33:26 -07:00
|
|
|
/*
|
|
|
|
* Override how many threads the parallel work queue is allowed to create.
|
|
|
|
* This has to be a debug-only global (instead of an errortag) because one of
|
|
|
|
* the main users of parallel workqueues is mount time quotacheck.
|
|
|
|
*/
|
|
|
|
STATIC ssize_t
|
|
|
|
pwork_threads_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (val < -1 || val > num_possible_cpus())
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
xfs_globals.pwork_threads = val;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
pwork_threads_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d\n", xfs_globals.pwork_threads);
|
2019-07-03 07:33:26 -07:00
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(pwork_threads);
|
2022-05-11 17:01:22 +10:00
|
|
|
|
2023-12-04 12:13:26 -08:00
|
|
|
/*
|
|
|
|
* The "LARP" (Logged extended Attribute Recovery Persistence) debugging knob
|
|
|
|
* sets the XFS_DA_OP_LOGGED flag on all xfs_attr_set operations performed on
|
|
|
|
* V5 filesystems. As a result, the intermediate progress of all setxattr and
|
|
|
|
* removexattr operations are tracked via the log and can be restarted during
|
|
|
|
* recovery. This is useful for testing xattr recovery prior to merging of the
|
|
|
|
* parent pointer feature which requires it to maintain consistency, and may be
|
|
|
|
* enabled for userspace xattrs in the future.
|
|
|
|
*/
|
2022-05-11 17:01:22 +10:00
|
|
|
static ssize_t
|
|
|
|
larp_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
ret = kstrtobool(buf, &xfs_globals.larp);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
larp_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.larp);
|
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(larp);
|
2019-07-03 07:33:26 -07:00
|
|
|
|
2023-12-15 10:03:28 -08:00
|
|
|
STATIC ssize_t
|
|
|
|
bload_leaf_slack_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
xfs_globals.bload_leaf_slack = val;
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
bload_leaf_slack_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bload_leaf_slack);
|
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(bload_leaf_slack);
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
bload_node_slack_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
xfs_globals.bload_node_slack = val;
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
bload_node_slack_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bload_node_slack);
|
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(bload_node_slack);
|
|
|
|
|
2014-09-09 11:52:42 +10:00
|
|
|
static struct attribute *xfs_dbg_attrs[] = {
|
2017-06-14 21:29:12 -07:00
|
|
|
ATTR_LIST(bug_on_assert),
|
2014-09-09 11:56:13 +10:00
|
|
|
ATTR_LIST(log_recovery_delay),
|
2018-05-10 21:50:23 -07:00
|
|
|
ATTR_LIST(mount_delay),
|
2019-02-18 09:38:49 -08:00
|
|
|
ATTR_LIST(always_cow),
|
2019-07-03 07:33:26 -07:00
|
|
|
ATTR_LIST(pwork_threads),
|
2022-05-11 17:01:22 +10:00
|
|
|
ATTR_LIST(larp),
|
2023-12-15 10:03:28 -08:00
|
|
|
ATTR_LIST(bload_leaf_slack),
|
|
|
|
ATTR_LIST(bload_node_slack),
|
2014-09-09 11:52:42 +10:00
|
|
|
NULL,
|
|
|
|
};
|
2022-01-03 11:10:18 -08:00
|
|
|
ATTRIBUTE_GROUPS(xfs_dbg);
|
2014-09-09 11:52:42 +10:00
|
|
|
|
2023-02-09 18:56:48 -08:00
|
|
|
const struct kobj_type xfs_dbg_ktype = {
|
2014-09-09 11:52:42 +10:00
|
|
|
.release = xfs_sysfs_release,
|
2015-10-12 05:18:45 +11:00
|
|
|
.sysfs_ops = &xfs_sysfs_ops,
|
2022-01-03 11:10:18 -08:00
|
|
|
.default_groups = xfs_dbg_groups,
|
2014-09-09 11:52:42 +10:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif /* DEBUG */
|
|
|
|
|
2015-10-12 05:15:45 +11:00
|
|
|
/* stats */
|
|
|
|
|
2015-10-12 05:19:45 +11:00
|
|
|
static inline struct xstats *
|
|
|
|
to_xstats(struct kobject *kobject)
|
|
|
|
{
|
|
|
|
struct xfs_kobj *kobj = to_kobj(kobject);
|
|
|
|
|
|
|
|
return container_of(kobj, struct xstats, xs_kobj);
|
|
|
|
}
|
|
|
|
|
2015-10-12 05:15:45 +11:00
|
|
|
STATIC ssize_t
|
|
|
|
stats_show(
|
2015-10-12 05:18:45 +11:00
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
2015-10-12 05:15:45 +11:00
|
|
|
{
|
2015-10-12 05:19:45 +11:00
|
|
|
struct xstats *stats = to_xstats(kobject);
|
|
|
|
|
|
|
|
return xfs_stats_format(stats->xs_stats, buf);
|
2015-10-12 05:15:45 +11:00
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RO(stats);
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
stats_clear_store(
|
2015-10-12 05:18:45 +11:00
|
|
|
struct kobject *kobject,
|
2015-10-12 05:15:45 +11:00
|
|
|
const char *buf,
|
2015-10-12 05:18:45 +11:00
|
|
|
size_t count)
|
2015-10-12 05:15:45 +11:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
int val;
|
2015-10-12 05:19:45 +11:00
|
|
|
struct xstats *stats = to_xstats(kobject);
|
2015-10-12 05:15:45 +11:00
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (val != 1)
|
|
|
|
return -EINVAL;
|
2015-10-12 05:19:45 +11:00
|
|
|
|
|
|
|
xfs_stats_clearall(stats->xs_stats);
|
2015-10-12 05:15:45 +11:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_WO(stats_clear);
|
|
|
|
|
|
|
|
static struct attribute *xfs_stats_attrs[] = {
|
|
|
|
ATTR_LIST(stats),
|
|
|
|
ATTR_LIST(stats_clear),
|
|
|
|
NULL,
|
|
|
|
};
|
2022-01-03 11:10:18 -08:00
|
|
|
ATTRIBUTE_GROUPS(xfs_stats);
|
2015-10-12 05:15:45 +11:00
|
|
|
|
2023-02-09 18:56:48 -08:00
|
|
|
const struct kobj_type xfs_stats_ktype = {
|
2015-10-12 05:15:45 +11:00
|
|
|
.release = xfs_sysfs_release,
|
2015-10-12 05:18:45 +11:00
|
|
|
.sysfs_ops = &xfs_sysfs_ops,
|
2022-01-03 11:10:18 -08:00
|
|
|
.default_groups = xfs_stats_groups,
|
2015-10-12 05:15:45 +11:00
|
|
|
};
|
|
|
|
|
2014-07-15 08:07:29 +10:00
|
|
|
/* xlog */
|
|
|
|
|
2015-10-12 05:18:45 +11:00
|
|
|
static inline struct xlog *
|
|
|
|
to_xlog(struct kobject *kobject)
|
|
|
|
{
|
|
|
|
struct xfs_kobj *kobj = to_kobj(kobject);
|
|
|
|
|
|
|
|
return container_of(kobj, struct xlog, l_kobj);
|
|
|
|
}
|
|
|
|
|
2014-07-15 08:07:48 +10:00
|
|
|
STATIC ssize_t
|
|
|
|
log_head_lsn_show(
|
2015-10-12 05:18:45 +11:00
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
2014-07-15 08:07:48 +10:00
|
|
|
{
|
|
|
|
int cycle;
|
|
|
|
int block;
|
2015-10-12 05:18:45 +11:00
|
|
|
struct xlog *log = to_xlog(kobject);
|
2014-07-15 08:07:48 +10:00
|
|
|
|
|
|
|
spin_lock(&log->l_icloglock);
|
|
|
|
cycle = log->l_curr_cycle;
|
|
|
|
block = log->l_curr_block;
|
|
|
|
spin_unlock(&log->l_icloglock);
|
|
|
|
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d:%d\n", cycle, block);
|
2014-07-15 08:07:48 +10:00
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RO(log_head_lsn);
|
|
|
|
|
|
|
|
STATIC ssize_t
|
|
|
|
log_tail_lsn_show(
|
2015-10-12 05:18:45 +11:00
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
2014-07-15 08:07:48 +10:00
|
|
|
{
|
|
|
|
int cycle;
|
|
|
|
int block;
|
2015-10-12 05:18:45 +11:00
|
|
|
struct xlog *log = to_xlog(kobject);
|
2014-07-15 08:07:48 +10:00
|
|
|
|
|
|
|
xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block);
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d:%d\n", cycle, block);
|
2014-07-15 08:07:48 +10:00
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RO(log_tail_lsn);
|
|
|
|
|
|
|
|
STATIC ssize_t
|
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 09:21:27 +02:00
|
|
|
reserve_grant_head_bytes_show(
|
2015-10-12 05:18:45 +11:00
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
2014-07-15 08:07:48 +10:00
|
|
|
{
|
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 09:21:27 +02:00
|
|
|
return sysfs_emit(buf, "%lld\n",
|
|
|
|
atomic64_read(&to_xlog(kobject)->l_reserve_head.grant));
|
2014-07-15 08:07:48 +10:00
|
|
|
}
|
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 09:21:27 +02:00
|
|
|
XFS_SYSFS_ATTR_RO(reserve_grant_head_bytes);
|
2014-07-15 08:07:48 +10:00
|
|
|
|
|
|
|
STATIC ssize_t
|
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 09:21:27 +02:00
|
|
|
write_grant_head_bytes_show(
|
2015-10-12 05:18:45 +11:00
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
2014-07-15 08:07:48 +10:00
|
|
|
{
|
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 09:21:27 +02:00
|
|
|
return sysfs_emit(buf, "%lld\n",
|
|
|
|
atomic64_read(&to_xlog(kobject)->l_write_head.grant));
|
2014-07-15 08:07:48 +10:00
|
|
|
}
|
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 09:21:27 +02:00
|
|
|
XFS_SYSFS_ATTR_RO(write_grant_head_bytes);
|
2014-07-15 08:07:48 +10:00
|
|
|
|
2014-07-15 08:07:29 +10:00
|
|
|
static struct attribute *xfs_log_attrs[] = {
|
2014-07-15 08:07:48 +10:00
|
|
|
ATTR_LIST(log_head_lsn),
|
|
|
|
ATTR_LIST(log_tail_lsn),
|
xfs: grant heads track byte counts, not LSNs
The grant heads in the log track the space reserved in the log for
running transactions. They do this by tracking how far ahead of the
tail that the reservation has reached, and the units for doing this
are {cycle,bytes} for the reserve head rather than {cycle,blocks}
which are normal used by LSNs.
This is annoyingly complex because we have to split, crack and
combined these tuples for any calculation we do to determine log
space and targets. This is computationally expensive as well as
difficult to do atomically and locklessly, as well as limiting the
size of the log to 2^32 bytes.
Really, though, all the grant heads are tracking is how much space
is currently available for use in the log. We can track this as a
simply byte count - we just don't care what the actual physical
location in the log the head and tail are at, just how much space we
have remaining before the head and tail overlap.
So, convert the grant heads to track the byte reservations that are
active rather than the current (cycle, offset) tuples. This means an
empty log has zero bytes consumed, and a full log is when the
reservations reach the size of the log minus the space consumed by
the AIL.
This greatly simplifies the accounting and checks for whether there
is space available. We no longer need to crack or combine LSNs to
determine how much space the log has left, nor do we need to look at
the head or tail of the log to determine how close to full we are.
There is, however, a complexity that needs to be handled. We know
how much space is being tracked in the AIL now via log->l_tail_space
and the log tickets track active reservations and return the unused
portions to the grant heads when ungranted. Unfortunately, we don't
track the used portion of the grant, so when we transfer log items
from the CIL to the AIL, the space accounted to the grant heads is
transferred to the log tail space. Hence when we move the AIL head
forwards on item insert, we have to remove that space from the grant
heads.
We also remove the xlog_verify_grant_tail() debug function as it is
no longer useful. The check it performs has been racy since delayed
logging was introduced, but now it is clearly only detecting false
positives so remove it.
The result of this substantially simpler accounting algorithm is an
increase in sustained transaction rate from ~1.3 million
transactions/s to ~1.9 million transactions/s with no increase in
CPU usage. We also remove the 32 bit space limitation on the grant
heads, which will allow us to increase the journal size beyond 2GB
in future.
Note that this renames the sysfs files exposing the log grant space
now that the values are exported in bytes. This allows xfstests
to auto-detect the old or new ABI.
[hch: move xlog_grant_sub_space out of line,
update the xlog_grant_{add,sub}_space prototypes,
rename the sysfs files to allow auto-detection in xfstests]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
2024-06-20 09:21:27 +02:00
|
|
|
ATTR_LIST(reserve_grant_head_bytes),
|
|
|
|
ATTR_LIST(write_grant_head_bytes),
|
2014-07-15 08:07:29 +10:00
|
|
|
NULL,
|
|
|
|
};
|
2022-01-03 11:10:18 -08:00
|
|
|
ATTRIBUTE_GROUPS(xfs_log);
|
2014-07-15 08:07:29 +10:00
|
|
|
|
2023-02-09 18:56:48 -08:00
|
|
|
const struct kobj_type xfs_log_ktype = {
|
2014-07-15 08:07:29 +10:00
|
|
|
.release = xfs_sysfs_release,
|
2015-10-12 05:18:45 +11:00
|
|
|
.sysfs_ops = &xfs_sysfs_ops,
|
2022-01-03 11:10:18 -08:00
|
|
|
.default_groups = xfs_log_groups,
|
2014-07-15 08:07:29 +10:00
|
|
|
};
|
2016-05-18 10:58:51 +10:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Metadata IO error configuration
|
|
|
|
*
|
|
|
|
* The sysfs structure here is:
|
|
|
|
* ...xfs/<dev>/error/<class>/<errno>/<error_attrs>
|
|
|
|
*
|
|
|
|
* where <class> allows us to discriminate between data IO and metadata IO,
|
|
|
|
* and any other future type of IO (e.g. special inode or directory error
|
|
|
|
* handling) we care to support.
|
|
|
|
*/
|
|
|
|
static inline struct xfs_error_cfg *
|
|
|
|
to_error_cfg(struct kobject *kobject)
|
|
|
|
{
|
|
|
|
struct xfs_kobj *kobj = to_kobj(kobject);
|
|
|
|
return container_of(kobj, struct xfs_error_cfg, kobj);
|
|
|
|
}
|
|
|
|
|
2016-05-18 11:11:27 +10:00
|
|
|
static inline struct xfs_mount *
|
|
|
|
err_to_mp(struct kobject *kobject)
|
|
|
|
{
|
|
|
|
struct xfs_kobj *kobj = to_kobj(kobject);
|
|
|
|
return container_of(kobj, struct xfs_mount, m_error_kobj);
|
|
|
|
}
|
|
|
|
|
2016-05-18 11:08:15 +10:00
|
|
|
static ssize_t
|
|
|
|
max_retries_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
2016-09-14 07:51:30 +10:00
|
|
|
int retries;
|
2016-05-18 11:08:15 +10:00
|
|
|
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
|
|
|
|
|
2017-01-03 20:34:17 -08:00
|
|
|
if (cfg->max_retries == XFS_ERR_RETRY_FOREVER)
|
2016-09-14 07:51:30 +10:00
|
|
|
retries = -1;
|
|
|
|
else
|
|
|
|
retries = cfg->max_retries;
|
|
|
|
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d\n", retries);
|
2016-05-18 11:08:15 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
max_retries_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
|
|
|
|
int ret;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (val < -1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-09-14 07:51:30 +10:00
|
|
|
if (val == -1)
|
2017-01-03 20:34:17 -08:00
|
|
|
cfg->max_retries = XFS_ERR_RETRY_FOREVER;
|
2016-09-14 07:51:30 +10:00
|
|
|
else
|
|
|
|
cfg->max_retries = val;
|
2016-05-18 11:08:15 +10:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(max_retries);
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
retry_timeout_seconds_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
2016-09-14 07:51:30 +10:00
|
|
|
int timeout;
|
2016-05-18 11:08:15 +10:00
|
|
|
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
|
|
|
|
|
2016-09-14 07:51:30 +10:00
|
|
|
if (cfg->retry_timeout == XFS_ERR_RETRY_FOREVER)
|
|
|
|
timeout = -1;
|
|
|
|
else
|
|
|
|
timeout = jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC;
|
|
|
|
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d\n", timeout);
|
2016-05-18 11:08:15 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
retry_timeout_seconds_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
|
|
|
|
int ret;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2016-09-14 07:51:30 +10:00
|
|
|
/* 1 day timeout maximum, -1 means infinite */
|
|
|
|
if (val < -1 || val > 86400)
|
2016-05-18 11:08:15 +10:00
|
|
|
return -EINVAL;
|
|
|
|
|
2016-09-14 07:51:30 +10:00
|
|
|
if (val == -1)
|
|
|
|
cfg->retry_timeout = XFS_ERR_RETRY_FOREVER;
|
|
|
|
else {
|
|
|
|
cfg->retry_timeout = msecs_to_jiffies(val * MSEC_PER_SEC);
|
|
|
|
ASSERT(msecs_to_jiffies(val * MSEC_PER_SEC) < LONG_MAX);
|
|
|
|
}
|
2016-05-18 11:08:15 +10:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(retry_timeout_seconds);
|
|
|
|
|
2016-05-18 11:11:27 +10:00
|
|
|
static ssize_t
|
|
|
|
fail_at_unmount_show(
|
|
|
|
struct kobject *kobject,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
struct xfs_mount *mp = err_to_mp(kobject);
|
|
|
|
|
2021-10-14 09:37:03 -07:00
|
|
|
return sysfs_emit(buf, "%d\n", mp->m_fail_unmount);
|
2016-05-18 11:11:27 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
fail_at_unmount_store(
|
|
|
|
struct kobject *kobject,
|
|
|
|
const char *buf,
|
|
|
|
size_t count)
|
|
|
|
{
|
|
|
|
struct xfs_mount *mp = err_to_mp(kobject);
|
|
|
|
int ret;
|
|
|
|
int val;
|
|
|
|
|
|
|
|
ret = kstrtoint(buf, 0, &val);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (val < 0 || val > 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
mp->m_fail_unmount = val;
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
XFS_SYSFS_ATTR_RW(fail_at_unmount);
|
|
|
|
|
2016-05-18 11:08:15 +10:00
|
|
|
static struct attribute *xfs_error_attrs[] = {
|
|
|
|
ATTR_LIST(max_retries),
|
|
|
|
ATTR_LIST(retry_timeout_seconds),
|
|
|
|
NULL,
|
|
|
|
};
|
2022-01-03 11:10:18 -08:00
|
|
|
ATTRIBUTE_GROUPS(xfs_error);
|
2016-05-18 11:08:15 +10:00
|
|
|
|
2023-02-09 18:56:48 -08:00
|
|
|
static const struct kobj_type xfs_error_cfg_ktype = {
|
2016-05-18 10:58:51 +10:00
|
|
|
.release = xfs_sysfs_release,
|
|
|
|
.sysfs_ops = &xfs_sysfs_ops,
|
2022-01-03 11:10:18 -08:00
|
|
|
.default_groups = xfs_error_groups,
|
2016-05-18 10:58:51 +10:00
|
|
|
};
|
|
|
|
|
2023-02-09 18:56:48 -08:00
|
|
|
static const struct kobj_type xfs_error_ktype = {
|
2016-05-18 10:58:51 +10:00
|
|
|
.release = xfs_sysfs_release,
|
2016-05-18 11:11:27 +10:00
|
|
|
.sysfs_ops = &xfs_sysfs_ops,
|
2016-05-18 10:58:51 +10:00
|
|
|
};
|
|
|
|
|
2016-05-18 11:06:44 +10:00
|
|
|
/*
|
|
|
|
* Error initialization tables. These need to be ordered in the same
|
|
|
|
* order as the enums used to index the array. All class init tables need to
|
|
|
|
* define a "default" behaviour as the first entry, all other entries can be
|
|
|
|
* empty.
|
|
|
|
*/
|
|
|
|
struct xfs_error_init {
|
|
|
|
char *name;
|
|
|
|
int max_retries;
|
2016-05-18 11:08:15 +10:00
|
|
|
int retry_timeout; /* in seconds */
|
2016-05-18 11:06:44 +10:00
|
|
|
};
|
|
|
|
|
|
|
|
static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = {
|
|
|
|
{ .name = "default",
|
2016-05-18 11:09:28 +10:00
|
|
|
.max_retries = XFS_ERR_RETRY_FOREVER,
|
2016-09-14 07:51:30 +10:00
|
|
|
.retry_timeout = XFS_ERR_RETRY_FOREVER,
|
2016-05-18 11:06:44 +10:00
|
|
|
},
|
2016-05-18 11:09:28 +10:00
|
|
|
{ .name = "EIO",
|
|
|
|
.max_retries = XFS_ERR_RETRY_FOREVER,
|
2016-09-14 07:51:30 +10:00
|
|
|
.retry_timeout = XFS_ERR_RETRY_FOREVER,
|
2016-05-18 11:09:28 +10:00
|
|
|
},
|
|
|
|
{ .name = "ENOSPC",
|
|
|
|
.max_retries = XFS_ERR_RETRY_FOREVER,
|
2016-09-14 07:51:30 +10:00
|
|
|
.retry_timeout = XFS_ERR_RETRY_FOREVER,
|
2016-05-18 11:09:28 +10:00
|
|
|
},
|
|
|
|
{ .name = "ENODEV",
|
2016-09-14 07:51:30 +10:00
|
|
|
.max_retries = 0, /* We can't recover from devices disappearing */
|
|
|
|
.retry_timeout = 0,
|
2016-05-18 11:09:28 +10:00
|
|
|
},
|
2016-05-18 11:06:44 +10:00
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
xfs_error_sysfs_init_class(
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
int class,
|
|
|
|
const char *parent_name,
|
|
|
|
struct xfs_kobj *parent_kobj,
|
|
|
|
const struct xfs_error_init init[])
|
|
|
|
{
|
|
|
|
struct xfs_error_cfg *cfg;
|
|
|
|
int error;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ASSERT(class < XFS_ERR_CLASS_MAX);
|
|
|
|
|
|
|
|
error = xfs_sysfs_init(parent_kobj, &xfs_error_ktype,
|
|
|
|
&mp->m_error_kobj, parent_name);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
for (i = 0; i < XFS_ERR_ERRNO_MAX; i++) {
|
|
|
|
cfg = &mp->m_error_cfg[class][i];
|
|
|
|
error = xfs_sysfs_init(&cfg->kobj, &xfs_error_cfg_ktype,
|
|
|
|
parent_kobj, init[i].name);
|
|
|
|
if (error)
|
|
|
|
goto out_error;
|
|
|
|
|
|
|
|
cfg->max_retries = init[i].max_retries;
|
2016-09-14 07:51:30 +10:00
|
|
|
if (init[i].retry_timeout == XFS_ERR_RETRY_FOREVER)
|
|
|
|
cfg->retry_timeout = XFS_ERR_RETRY_FOREVER;
|
|
|
|
else
|
|
|
|
cfg->retry_timeout = msecs_to_jiffies(
|
2016-05-18 11:08:15 +10:00
|
|
|
init[i].retry_timeout * MSEC_PER_SEC);
|
2016-05-18 11:06:44 +10:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_error:
|
|
|
|
/* unwind the entries that succeeded */
|
|
|
|
for (i--; i >= 0; i--) {
|
|
|
|
cfg = &mp->m_error_cfg[class][i];
|
|
|
|
xfs_sysfs_del(&cfg->kobj);
|
|
|
|
}
|
|
|
|
xfs_sysfs_del(parent_kobj);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2016-05-18 10:58:51 +10:00
|
|
|
int
|
|
|
|
xfs_error_sysfs_init(
|
|
|
|
struct xfs_mount *mp)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
/* .../xfs/<dev>/error/ */
|
|
|
|
error = xfs_sysfs_init(&mp->m_error_kobj, &xfs_error_ktype,
|
|
|
|
&mp->m_kobj, "error");
|
2016-05-18 11:01:00 +10:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
2016-05-18 11:11:27 +10:00
|
|
|
error = sysfs_create_file(&mp->m_error_kobj.kobject,
|
|
|
|
ATTR_LIST(fail_at_unmount));
|
|
|
|
|
|
|
|
if (error)
|
|
|
|
goto out_error;
|
|
|
|
|
2016-05-18 11:01:00 +10:00
|
|
|
/* .../xfs/<dev>/error/metadata/ */
|
2016-05-18 11:06:44 +10:00
|
|
|
error = xfs_error_sysfs_init_class(mp, XFS_ERR_METADATA,
|
|
|
|
"metadata", &mp->m_error_meta_kobj,
|
|
|
|
xfs_error_meta_init);
|
2016-05-18 11:01:00 +10:00
|
|
|
if (error)
|
|
|
|
goto out_error;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_error:
|
|
|
|
xfs_sysfs_del(&mp->m_error_kobj);
|
2016-05-18 10:58:51 +10:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
xfs_error_sysfs_del(
|
|
|
|
struct xfs_mount *mp)
|
|
|
|
{
|
2016-05-18 11:01:00 +10:00
|
|
|
struct xfs_error_cfg *cfg;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < XFS_ERR_CLASS_MAX; i++) {
|
|
|
|
for (j = 0; j < XFS_ERR_ERRNO_MAX; j++) {
|
|
|
|
cfg = &mp->m_error_cfg[i][j];
|
|
|
|
|
|
|
|
xfs_sysfs_del(&cfg->kobj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
xfs_sysfs_del(&mp->m_error_meta_kobj);
|
2016-05-18 10:58:51 +10:00
|
|
|
xfs_sysfs_del(&mp->m_error_kobj);
|
|
|
|
}
|
2016-05-18 11:05:33 +10:00
|
|
|
|
|
|
|
struct xfs_error_cfg *
|
|
|
|
xfs_error_get_cfg(
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
int error_class,
|
|
|
|
int error)
|
|
|
|
{
|
|
|
|
struct xfs_error_cfg *cfg;
|
|
|
|
|
2016-07-20 10:48:51 +10:00
|
|
|
if (error < 0)
|
|
|
|
error = -error;
|
|
|
|
|
2016-05-18 11:05:33 +10:00
|
|
|
switch (error) {
|
2016-05-18 11:09:28 +10:00
|
|
|
case EIO:
|
|
|
|
cfg = &mp->m_error_cfg[error_class][XFS_ERR_EIO];
|
|
|
|
break;
|
|
|
|
case ENOSPC:
|
|
|
|
cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENOSPC];
|
|
|
|
break;
|
|
|
|
case ENODEV:
|
|
|
|
cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENODEV];
|
|
|
|
break;
|
2016-05-18 11:05:33 +10:00
|
|
|
default:
|
|
|
|
cfg = &mp->m_error_cfg[error_class][XFS_ERR_DEFAULT];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return cfg;
|
|
|
|
}
|