vfs-6.13-rc7.fixes.2

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ4EhtAAKCRCRxhvAZXjc
 orToAQCIKKS7fk9j8CUSAdRG5mMy7Q++8OEVA+gyyMWuXnBPYwD/ehy+1xBVjCcI
 FBzLadaJSuygjZVCzhVXsE0oRf4A2wg=
 =waDA
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.13-rc7.fixes.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
 "afs:

   - Fix the maximum cell name length

   - Fix merge preference rule failure condition

  fuse:

   - Fix fuse_get_user_pages() so it doesn't risk misleading the caller
     to think pages have been allocated when they actually haven't

   - Fix direct-io folio offset and length calculation

  netfs:

   - Fix async direct-io handling

   - Fix read-retry for filesystems that don't provide a
     ->prepare_read() method

  vfs:

   - Prevent truncating 64-bit offsets to 32-bits in iomap

   - Fix memory barrier interactions when polling

   - Remove MNT_ONRB to fix concurrent modification of @mnt->mnt_flags
     leading to MNT_ONRB to not be raised and invalid access to a list
     member"

* tag 'vfs-6.13-rc7.fixes.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  poll: kill poll_does_not_wait()
  sock_poll_wait: kill the no longer necessary barrier after poll_wait()
  io_uring_poll: kill the no longer necessary barrier after poll_wait()
  poll_wait: kill the obsolete wait_address check
  poll_wait: add mb() to fix theoretical race between waitqueue_active() and .poll()
  afs: Fix merge preference rule failure condition
  netfs: Fix read-retry for fs with no ->prepare_read()
  netfs: Fix kernel async DIO
  fs: kill MNT_ONRB
  iomap: avoid avoid truncating 64-bit offset to 32 bits
  afs: Fix the maximum cell name length
  fuse: Set *nbytesp=0 in fuse_get_user_pages on allocation failure
  fuse: fix direct io folio offset and length calculation
This commit is contained in:
Linus Torvalds 2025-01-10 09:11:11 -08:00
commit 7110f24f9e
15 changed files with 80 additions and 66 deletions

View File

@ -413,8 +413,10 @@ int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size)
do {
argc = afs_split_string(&buf, argv, ARRAY_SIZE(argv));
if (argc < 0)
return argc;
if (argc < 0) {
ret = argc;
goto done;
}
if (argc < 2)
goto inval;

View File

@ -10,7 +10,7 @@
#include <linux/in.h>
#define AFS_MAXCELLNAME 256 /* Maximum length of a cell name */
#define AFS_MAXCELLNAME 253 /* Maximum length of a cell name (DNS limited) */
#define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */
#define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */
#define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */

View File

@ -13,6 +13,7 @@
#define AFS_VL_PORT 7003 /* volume location service port */
#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */
#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */
#define YFS_VL_MAXCELLNAME 256 /* Maximum length of a cell name in YFS protocol */
enum AFSVL_Operations {
VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */

View File

@ -253,6 +253,7 @@ static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key)
static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key)
{
struct afs_cell *master;
size_t name_len;
char *cell_name;
cell_name = afs_vl_get_cell_name(cell, key);
@ -264,8 +265,11 @@ static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key)
return 0;
}
master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name),
NULL, false);
name_len = strlen(cell_name);
if (!name_len || name_len > AFS_MAXCELLNAME)
master = ERR_PTR(-EOPNOTSUPP);
else
master = afs_lookup_cell(cell->net, cell_name, name_len, NULL, false);
kfree(cell_name);
if (IS_ERR(master))
return PTR_ERR(master);

View File

@ -697,7 +697,7 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call)
return ret;
namesz = ntohl(call->tmp);
if (namesz > AFS_MAXCELLNAME)
if (namesz > YFS_VL_MAXCELLNAME)
return afs_protocol_error(call, afs_eproto_cellname_len);
paddedsz = (namesz + 3) & ~3;
call->count = namesz;

View File

@ -1541,8 +1541,10 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
*/
struct page **pages = kzalloc(max_pages * sizeof(struct page *),
GFP_KERNEL);
if (!pages)
return -ENOMEM;
if (!pages) {
ret = -ENOMEM;
goto out;
}
while (nbytes < *nbytesp && nr_pages < max_pages) {
unsigned nfolios, i;
@ -1557,18 +1559,22 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
nbytes += ret;
ret += start;
/* Currently, all folios in FUSE are one page */
nfolios = DIV_ROUND_UP(ret, PAGE_SIZE);
nfolios = DIV_ROUND_UP(ret + start, PAGE_SIZE);
ap->descs[ap->num_folios].offset = start;
fuse_folio_descs_length_init(ap->descs, ap->num_folios, nfolios);
for (i = 0; i < nfolios; i++)
ap->folios[i + ap->num_folios] = page_folio(pages[i]);
for (i = 0; i < nfolios; i++) {
struct folio *folio = page_folio(pages[i]);
unsigned int offset = start +
(folio_page_idx(folio, pages[i]) << PAGE_SHIFT);
unsigned int len = min_t(unsigned int, ret, PAGE_SIZE - start);
ap->descs[ap->num_folios].offset = offset;
ap->descs[ap->num_folios].length = len;
ap->folios[ap->num_folios] = folio;
start = 0;
ret -= len;
ap->num_folios++;
}
ap->num_folios += nfolios;
ap->descs[ap->num_folios - 1].length -=
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
nr_pages += nfolios;
}
kfree(pages);
@ -1584,6 +1590,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
else
ap->args.out_pages = true;
out:
*nbytesp = nbytes;
return ret < 0 ? ret : 0;

View File

@ -1138,7 +1138,7 @@ static void iomap_write_delalloc_scan(struct inode *inode,
start_byte, end_byte, iomap, punch);
/* move offset to start of next folio in range */
start_byte = folio_next_index(folio) << PAGE_SHIFT;
start_byte = folio_pos(folio) + folio_size(folio);
folio_unlock(folio);
folio_put(folio);
}

View File

@ -38,6 +38,7 @@ struct mount {
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
union {
struct rb_node mnt_node; /* node in the ns->mounts rbtree */
struct rcu_head mnt_rcu;
struct llist_node mnt_llist;
};
@ -51,10 +52,7 @@ struct mount {
struct list_head mnt_child; /* and going through their mnt_child */
struct list_head mnt_instance; /* mount instance on sb->s_mounts */
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
union {
struct rb_node mnt_node; /* Under ns->mounts */
struct list_head mnt_list;
};
struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct list_head mnt_share; /* circular list of shared mounts */
struct list_head mnt_slave_list;/* list of slave mounts */
@ -145,11 +143,16 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
return ns->seq == 0;
}
static inline bool mnt_ns_attached(const struct mount *mnt)
{
return !RB_EMPTY_NODE(&mnt->mnt_node);
}
static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
{
WARN_ON(!(mnt->mnt.mnt_flags & MNT_ONRB));
mnt->mnt.mnt_flags &= ~MNT_ONRB;
WARN_ON(!mnt_ns_attached(mnt));
rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts);
RB_CLEAR_NODE(&mnt->mnt_node);
list_add_tail(&mnt->mnt_list, dt_list);
}

View File

@ -344,6 +344,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_HLIST_NODE(&mnt->mnt_mp_list);
INIT_LIST_HEAD(&mnt->mnt_umounting);
INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
RB_CLEAR_NODE(&mnt->mnt_node);
mnt->mnt.mnt_idmap = &nop_mnt_idmap;
}
return mnt;
@ -1124,7 +1125,7 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
struct rb_node **link = &ns->mounts.rb_node;
struct rb_node *parent = NULL;
WARN_ON(mnt->mnt.mnt_flags & MNT_ONRB);
WARN_ON(mnt_ns_attached(mnt));
mnt->mnt_ns = ns;
while (*link) {
parent = *link;
@ -1135,7 +1136,6 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
}
rb_link_node(&mnt->mnt_node, parent, link);
rb_insert_color(&mnt->mnt_node, &ns->mounts);
mnt->mnt.mnt_flags |= MNT_ONRB;
}
/*
@ -1305,7 +1305,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
}
mnt->mnt.mnt_flags = old->mnt.mnt_flags;
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL|MNT_ONRB);
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
atomic_inc(&sb->s_active);
mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
@ -1763,7 +1763,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
/* Gather the mounts to umount */
for (p = mnt; p; p = next_mnt(p, mnt)) {
p->mnt.mnt_flags |= MNT_UMOUNT;
if (p->mnt.mnt_flags & MNT_ONRB)
if (mnt_ns_attached(p))
move_from_ns(p, &tmp_list);
else
list_move(&p->mnt_list, &tmp_list);
@ -1912,16 +1912,14 @@ static int do_umount(struct mount *mnt, int flags)
event++;
if (flags & MNT_DETACH) {
if (mnt->mnt.mnt_flags & MNT_ONRB ||
!list_empty(&mnt->mnt_list))
if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
umount_tree(mnt, UMOUNT_PROPAGATE);
retval = 0;
} else {
shrink_submounts(mnt);
retval = -EBUSY;
if (!propagate_mount_busy(mnt, 2)) {
if (mnt->mnt.mnt_flags & MNT_ONRB ||
!list_empty(&mnt->mnt_list))
if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
retval = 0;
}

View File

@ -67,7 +67,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
* allocate a sufficiently large bvec array and may shorten the
* request.
*/
if (async || user_backed_iter(iter)) {
if (user_backed_iter(iter)) {
n = netfs_extract_user_iter(iter, len, &wreq->iter, 0);
if (n < 0) {
ret = n;
@ -77,6 +77,11 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
wreq->direct_bv_count = n;
wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
} else {
/* If this is a kernel-generated async DIO request,
* assume that any resources the iterator points to
* (eg. a bio_vec array) will persist till the end of
* the op.
*/
wreq->iter = *iter;
}

View File

@ -152,7 +152,8 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
BUG_ON(!len);
/* Renegotiate max_len (rsize) */
if (rreq->netfs_ops->prepare_read(subreq) < 0) {
if (rreq->netfs_ops->prepare_read &&
rreq->netfs_ops->prepare_read(subreq) < 0) {
trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
}

View File

@ -50,7 +50,7 @@ struct path;
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB)
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
#define MNT_INTERNAL 0x4000
@ -64,7 +64,6 @@ struct path;
#define MNT_SYNC_UMOUNT 0x2000000
#define MNT_MARKED 0x4000000
#define MNT_UMOUNT 0x8000000
#define MNT_ONRB 0x10000000
struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */

View File

@ -25,14 +25,14 @@
struct poll_table_struct;
/*
/*
* structures and helpers for f_op->poll implementations
*/
typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *);
/*
* Do not touch the structure directly, use the access functions
* poll_does_not_wait() and poll_requested_events() instead.
* Do not touch the structure directly, use the access function
* poll_requested_events() instead.
*/
typedef struct poll_table_struct {
poll_queue_proc _qproc;
@ -41,18 +41,16 @@ typedef struct poll_table_struct {
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
if (p && p->_qproc && wait_address)
if (p && p->_qproc) {
p->_qproc(filp, wait_address, p);
}
/*
* Return true if it is guaranteed that poll will not wait. This is the case
* if the poll() of another file descriptor in the set got an event, so there
* is no need for waiting.
*/
static inline bool poll_does_not_wait(const poll_table *p)
{
return p == NULL || p->_qproc == NULL;
/*
* This memory barrier is paired in the wq_has_sleeper().
* See the comment above prepare_to_wait(), we need to
* ensure that subsequent tests in this thread can't be
* reordered with __add_wait_queue() in _qproc() paths.
*/
smp_mb();
}
}
/*

View File

@ -2297,7 +2297,7 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
}
/**
* sock_poll_wait - place memory barrier behind the poll_wait call.
* sock_poll_wait - wrapper for the poll_wait call.
* @filp: file
* @sock: socket to wait on
* @p: poll_table
@ -2307,15 +2307,12 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
static inline void sock_poll_wait(struct file *filp, struct socket *sock,
poll_table *p)
{
if (!poll_does_not_wait(p)) {
poll_wait(filp, &sock->wq.wait, p);
/* We need to be sure we are in sync with the
* socket flags modification.
*
* This memory barrier is paired in the wq_has_sleeper.
*/
smp_mb();
}
/* Provides a barrier we need to be sure we are in sync
* with the socket flags modification.
*
* This memory barrier is paired in the wq_has_sleeper.
*/
poll_wait(filp, &sock->wq.wait, p);
}
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)

View File

@ -2813,13 +2813,12 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
if (unlikely(!ctx->poll_activated))
io_activate_pollwq(ctx);
poll_wait(file, &ctx->poll_wq, wait);
/*
* synchronizes with barrier from wq_has_sleeper call in
* io_commit_cqring
* provides mb() which pairs with barrier from wq_has_sleeper
* call in io_commit_cqring
*/
smp_rmb();
poll_wait(file, &ctx->poll_wq, wait);
if (!io_sqring_full(ctx))
mask |= EPOLLOUT | EPOLLWRNORM;