mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
acc8f8628c
Ever since 6.12-rc1, I've observed a pile of warnings from the kernel when running fstests with quotas enabled: WARNING: CPU: 1 PID: 458580 at mm/page_alloc.c:4221 __alloc_pages_noprof+0xc9c/0xf18 CPU: 1 UID: 0 PID: 458580 Comm: xfsaild/sda3 Tainted: G W 6.12.0-rc6-djwa #rc6 6ee3e0e531f6457e2d26aa008a3b65ff184b377c <snip> Call trace: __alloc_pages_noprof+0xc9c/0xf18 alloc_pages_mpol_noprof+0x94/0x240 alloc_pages_noprof+0x68/0xf8 new_slab+0x3e0/0x568 ___slab_alloc+0x5a0/0xb88 __slab_alloc.constprop.0+0x7c/0xf8 __kmalloc_noprof+0x404/0x4d0 xfs_buf_get_map+0x594/0xde0 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_buf_read_map+0x64/0x2e0 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_trans_read_buf_map+0x1dc/0x518 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_qm_dqflush+0xac/0x468 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_qm_dquot_logitem_push+0xe4/0x148 [xfs 384cb02810558b4c490343c164e9407332118f88] xfsaild+0x3f4/0xde8 [xfs 384cb02810558b4c490343c164e9407332118f88] kthread+0x110/0x128 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- This corresponds to the line: WARN_ON_ONCE(current->flags & PF_MEMALLOC); within the NOFAIL checks. What's happening here is that the XFS AIL is trying to write a disk quota update back into the filesystem, but for that it needs to read the ondisk buffer for the dquot. The buffer is not in memory anymore, probably because it was evicted. Regardless, the buffer cache tries to allocate a new buffer, but those allocations are NOFAIL. The AIL thread has marked itself PF_MEMALLOC (aka noreclaim) since commit43ff2122e6
("xfs: on-stack delayed write buffer lists") presumably because reclaim can push on XFS to push on the AIL. An easy way to fix this probably would have been to drop the NOFAIL flag from the xfs_buf allocation and open code a retry loop, but then there's still the problem that for bs>ps filesystems, the buffer itself could require up to 64k worth of pages. Inode items had similar behavior (multi-page cluster buffers that we don't want to allocate in the AIL) which we solved by making transaction precommit attach the inode cluster buffers to the dirty log item. Let's solve the dquot problem in the same way. So: Make a real precommit handler to read the dquot buffer and attach it to the log item; pass it to dqflush in the push method; and have the iodone function detach the buffer once we've flushed everything. Add a state flag to the log item to track when a thread has entered the precommit -> push mechanism to skip the detaching if it turns out that the dquot is very busy, as we don't hold the dquot lock between log item commit and AIL push). Reading and attaching the dquot buffer in the precommit hook is inspired by the work done for inode cluster buffers some time ago. Cc: <stable@vger.kernel.org> # v6.12 Fixes:903edea6c5
("mm: warn about illegal __GFP_NOFAIL usage in a more appropriate location and manner") Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
275 lines
6.4 KiB
C
275 lines
6.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2000-2003 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_quota.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_buf_item.h"
|
|
#include "xfs_trans_priv.h"
|
|
#include "xfs_qm.h"
|
|
#include "xfs_log.h"
|
|
#include "xfs_error.h"
|
|
|
|
static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
|
|
{
|
|
return container_of(lip, struct xfs_dq_logitem, qli_item);
|
|
}
|
|
|
|
/*
|
|
* returns the number of iovecs needed to log the given dquot item.
|
|
*/
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_size(
|
|
struct xfs_log_item *lip,
|
|
int *nvecs,
|
|
int *nbytes)
|
|
{
|
|
*nvecs += 2;
|
|
*nbytes += sizeof(struct xfs_dq_logformat) +
|
|
sizeof(struct xfs_disk_dquot);
|
|
}
|
|
|
|
/*
|
|
* fills in the vector of log iovecs for the given dquot log item.
|
|
*/
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_format(
|
|
struct xfs_log_item *lip,
|
|
struct xfs_log_vec *lv)
|
|
{
|
|
struct xfs_disk_dquot ddq;
|
|
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
|
|
struct xfs_log_iovec *vecp = NULL;
|
|
struct xfs_dq_logformat *qlf;
|
|
|
|
qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT);
|
|
qlf->qlf_type = XFS_LI_DQUOT;
|
|
qlf->qlf_size = 2;
|
|
qlf->qlf_id = qlip->qli_dquot->q_id;
|
|
qlf->qlf_blkno = qlip->qli_dquot->q_blkno;
|
|
qlf->qlf_len = 1;
|
|
qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset;
|
|
xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat));
|
|
|
|
xfs_dquot_to_disk(&ddq, qlip->qli_dquot);
|
|
|
|
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, &ddq,
|
|
sizeof(struct xfs_disk_dquot));
|
|
}
|
|
|
|
/*
|
|
* Increment the pin count of the given dquot.
|
|
*/
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_pin(
|
|
struct xfs_log_item *lip)
|
|
{
|
|
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
|
|
|
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
|
atomic_inc(&dqp->q_pincount);
|
|
}
|
|
|
|
/*
|
|
* Decrement the pin count of the given dquot, and wake up
|
|
* anyone in xfs_dqwait_unpin() if the count goes to 0. The
|
|
* dquot must have been previously pinned with a call to
|
|
* xfs_qm_dquot_logitem_pin().
|
|
*/
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_unpin(
|
|
struct xfs_log_item *lip,
|
|
int remove)
|
|
{
|
|
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
|
|
|
ASSERT(atomic_read(&dqp->q_pincount) > 0);
|
|
if (atomic_dec_and_test(&dqp->q_pincount))
|
|
wake_up(&dqp->q_pinwait);
|
|
}
|
|
|
|
/*
|
|
* This is called to wait for the given dquot to be unpinned.
|
|
* Most of these pin/unpin routines are plagiarized from inode code.
|
|
*/
|
|
void
|
|
xfs_qm_dqunpin_wait(
|
|
struct xfs_dquot *dqp)
|
|
{
|
|
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
|
if (atomic_read(&dqp->q_pincount) == 0)
|
|
return;
|
|
|
|
/*
|
|
* Give the log a push so we don't wait here too long.
|
|
*/
|
|
xfs_log_force(dqp->q_mount, 0);
|
|
wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
|
|
}
|
|
|
|
STATIC uint
|
|
xfs_qm_dquot_logitem_push(
|
|
struct xfs_log_item *lip,
|
|
struct list_head *buffer_list)
|
|
__releases(&lip->li_ailp->ail_lock)
|
|
__acquires(&lip->li_ailp->ail_lock)
|
|
{
|
|
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
|
|
struct xfs_dquot *dqp = qlip->qli_dquot;
|
|
struct xfs_buf *bp;
|
|
uint rval = XFS_ITEM_SUCCESS;
|
|
int error;
|
|
|
|
if (atomic_read(&dqp->q_pincount) > 0)
|
|
return XFS_ITEM_PINNED;
|
|
|
|
if (!xfs_dqlock_nowait(dqp))
|
|
return XFS_ITEM_LOCKED;
|
|
|
|
/*
|
|
* Re-check the pincount now that we stabilized the value by
|
|
* taking the quota lock.
|
|
*/
|
|
if (atomic_read(&dqp->q_pincount) > 0) {
|
|
rval = XFS_ITEM_PINNED;
|
|
goto out_unlock;
|
|
}
|
|
|
|
/*
|
|
* Someone else is already flushing the dquot. Nothing we can do
|
|
* here but wait for the flush to finish and remove the item from
|
|
* the AIL.
|
|
*/
|
|
if (!xfs_dqflock_nowait(dqp)) {
|
|
rval = XFS_ITEM_FLUSHING;
|
|
goto out_unlock;
|
|
}
|
|
|
|
spin_unlock(&lip->li_ailp->ail_lock);
|
|
|
|
error = xfs_dquot_use_attached_buf(dqp, &bp);
|
|
if (error == -EAGAIN) {
|
|
xfs_dqfunlock(dqp);
|
|
rval = XFS_ITEM_LOCKED;
|
|
goto out_relock_ail;
|
|
}
|
|
|
|
/*
|
|
* dqflush completes dqflock on error, and the delwri ioend does it on
|
|
* success.
|
|
*/
|
|
error = xfs_qm_dqflush(dqp, bp);
|
|
if (!error) {
|
|
if (!xfs_buf_delwri_queue(bp, buffer_list))
|
|
rval = XFS_ITEM_FLUSHING;
|
|
}
|
|
xfs_buf_relse(bp);
|
|
|
|
out_relock_ail:
|
|
spin_lock(&lip->li_ailp->ail_lock);
|
|
out_unlock:
|
|
xfs_dqunlock(dqp);
|
|
return rval;
|
|
}
|
|
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_release(
|
|
struct xfs_log_item *lip)
|
|
{
|
|
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
|
|
|
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
|
|
|
/*
|
|
* dquots are never 'held' from getting unlocked at the end of
|
|
* a transaction. Their locking and unlocking is hidden inside the
|
|
* transaction layer, within trans_commit. Hence, no LI_HOLD flag
|
|
* for the logitem.
|
|
*/
|
|
xfs_dqunlock(dqp);
|
|
}
|
|
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_committing(
|
|
struct xfs_log_item *lip,
|
|
xfs_csn_t seq)
|
|
{
|
|
return xfs_qm_dquot_logitem_release(lip);
|
|
}
|
|
|
|
#ifdef DEBUG_EXPENSIVE
|
|
static void
|
|
xfs_qm_dquot_logitem_precommit_check(
|
|
struct xfs_dquot *dqp)
|
|
{
|
|
struct xfs_mount *mp = dqp->q_mount;
|
|
struct xfs_disk_dquot ddq = { };
|
|
xfs_failaddr_t fa;
|
|
|
|
xfs_dquot_to_disk(&ddq, dqp);
|
|
fa = xfs_dquot_verify(mp, &ddq, dqp->q_id);
|
|
if (fa) {
|
|
XFS_CORRUPTION_ERROR("Bad dquot during logging",
|
|
XFS_ERRLEVEL_LOW, mp, &ddq, sizeof(ddq));
|
|
xfs_alert(mp,
|
|
"Metadata corruption detected at %pS, dquot 0x%x",
|
|
fa, dqp->q_id);
|
|
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
|
|
ASSERT(fa == NULL);
|
|
}
|
|
}
|
|
#else
|
|
# define xfs_qm_dquot_logitem_precommit_check(...) ((void)0)
|
|
#endif
|
|
|
|
static int
|
|
xfs_qm_dquot_logitem_precommit(
|
|
struct xfs_trans *tp,
|
|
struct xfs_log_item *lip)
|
|
{
|
|
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
|
|
struct xfs_dquot *dqp = qlip->qli_dquot;
|
|
|
|
xfs_qm_dquot_logitem_precommit_check(dqp);
|
|
|
|
return xfs_dquot_attach_buf(tp, dqp);
|
|
}
|
|
|
|
static const struct xfs_item_ops xfs_dquot_item_ops = {
|
|
.iop_size = xfs_qm_dquot_logitem_size,
|
|
.iop_precommit = xfs_qm_dquot_logitem_precommit,
|
|
.iop_format = xfs_qm_dquot_logitem_format,
|
|
.iop_pin = xfs_qm_dquot_logitem_pin,
|
|
.iop_unpin = xfs_qm_dquot_logitem_unpin,
|
|
.iop_release = xfs_qm_dquot_logitem_release,
|
|
.iop_committing = xfs_qm_dquot_logitem_committing,
|
|
.iop_push = xfs_qm_dquot_logitem_push,
|
|
};
|
|
|
|
/*
|
|
* Initialize the dquot log item for a newly allocated dquot.
|
|
* The dquot isn't locked at this point, but it isn't on any of the lists
|
|
* either, so we don't care.
|
|
*/
|
|
void
|
|
xfs_qm_dquot_logitem_init(
|
|
struct xfs_dquot *dqp)
|
|
{
|
|
struct xfs_dq_logitem *lp = &dqp->q_logitem;
|
|
|
|
xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
|
|
&xfs_dquot_item_ops);
|
|
spin_lock_init(&lp->qli_lock);
|
|
lp->qli_dquot = dqp;
|
|
lp->qli_dirty = false;
|
|
}
|