diff --git a/fs/afs/addr_prefs.c b/fs/afs/addr_prefs.c index a189ff8a5034..c0384201b8fe 100644 --- a/fs/afs/addr_prefs.c +++ b/fs/afs/addr_prefs.c @@ -413,8 +413,10 @@ int afs_proc_addr_prefs_write(struct file *file, char *buf, size_t size) do { argc = afs_split_string(&buf, argv, ARRAY_SIZE(argv)); - if (argc < 0) - return argc; + if (argc < 0) { + ret = argc; + goto done; + } if (argc < 2) goto inval; diff --git a/fs/afs/afs.h b/fs/afs/afs.h index b488072aee87..ec3db00bd081 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -10,7 +10,7 @@ #include -#define AFS_MAXCELLNAME 256 /* Maximum length of a cell name */ +#define AFS_MAXCELLNAME 253 /* Maximum length of a cell name (DNS limited) */ #define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */ #define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */ #define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */ diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index a06296c8827d..b835e25a2c02 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -13,6 +13,7 @@ #define AFS_VL_PORT 7003 /* volume location service port */ #define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */ #define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */ +#define YFS_VL_MAXCELLNAME 256 /* Maximum length of a cell name in YFS protocol */ enum AFSVL_Operations { VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */ diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index 9f36e14f1c2d..f9e76b604f31 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -253,6 +253,7 @@ static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key) static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key) { struct afs_cell *master; + size_t name_len; char *cell_name; cell_name = afs_vl_get_cell_name(cell, key); @@ -264,8 +265,11 @@ static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key) return 0; } - master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name), - NULL, false); + name_len = strlen(cell_name); + if (!name_len || name_len > AFS_MAXCELLNAME) + master = ERR_PTR(-EOPNOTSUPP); + else + master = afs_lookup_cell(cell->net, cell_name, name_len, NULL, false); kfree(cell_name); if (IS_ERR(master)) return PTR_ERR(master); diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index cac75f89b64a..55dd0fc5aad7 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -697,7 +697,7 @@ static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call) return ret; namesz = ntohl(call->tmp); - if (namesz > AFS_MAXCELLNAME) + if (namesz > YFS_VL_MAXCELLNAME) return afs_protocol_error(call, afs_eproto_cellname_len); paddedsz = (namesz + 3) & ~3; call->count = namesz; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 88d0946b5bc9..7d92a5479998 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1541,8 +1541,10 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, */ struct page **pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); - if (!pages) - return -ENOMEM; + if (!pages) { + ret = -ENOMEM; + goto out; + } while (nbytes < *nbytesp && nr_pages < max_pages) { unsigned nfolios, i; @@ -1557,18 +1559,22 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, nbytes += ret; - ret += start; - /* Currently, all folios in FUSE are one page */ - nfolios = DIV_ROUND_UP(ret, PAGE_SIZE); + nfolios = DIV_ROUND_UP(ret + start, PAGE_SIZE); - ap->descs[ap->num_folios].offset = start; - fuse_folio_descs_length_init(ap->descs, ap->num_folios, nfolios); - for (i = 0; i < nfolios; i++) - ap->folios[i + ap->num_folios] = page_folio(pages[i]); + for (i = 0; i < nfolios; i++) { + struct folio *folio = page_folio(pages[i]); + unsigned int offset = start + + (folio_page_idx(folio, pages[i]) << PAGE_SHIFT); + unsigned int len = min_t(unsigned int, ret, PAGE_SIZE - start); + + ap->descs[ap->num_folios].offset = offset; + ap->descs[ap->num_folios].length = len; + ap->folios[ap->num_folios] = folio; + start = 0; + ret -= len; + ap->num_folios++; + } - ap->num_folios += nfolios; - ap->descs[ap->num_folios - 1].length -= - (PAGE_SIZE - ret) & (PAGE_SIZE - 1); nr_pages += nfolios; } kfree(pages); @@ -1584,6 +1590,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, else ap->args.out_pages = true; +out: *nbytesp = nbytes; return ret < 0 ? ret : 0; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 54dc27d92781..d303e6c8900c 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1138,7 +1138,7 @@ static void iomap_write_delalloc_scan(struct inode *inode, start_byte, end_byte, iomap, punch); /* move offset to start of next folio in range */ - start_byte = folio_next_index(folio) << PAGE_SHIFT; + start_byte = folio_pos(folio) + folio_size(folio); folio_unlock(folio); folio_put(folio); } diff --git a/fs/mount.h b/fs/mount.h index 185fc56afc13..179f690a0c72 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -38,6 +38,7 @@ struct mount { struct dentry *mnt_mountpoint; struct vfsmount mnt; union { + struct rb_node mnt_node; /* node in the ns->mounts rbtree */ struct rcu_head mnt_rcu; struct llist_node mnt_llist; }; @@ -51,10 +52,7 @@ struct mount { struct list_head mnt_child; /* and going through their mnt_child */ struct list_head mnt_instance; /* mount instance on sb->s_mounts */ const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ - union { - struct rb_node mnt_node; /* Under ns->mounts */ - struct list_head mnt_list; - }; + struct list_head mnt_list; struct list_head mnt_expire; /* link in fs-specific expiry list */ struct list_head mnt_share; /* circular list of shared mounts */ struct list_head mnt_slave_list;/* list of slave mounts */ @@ -145,11 +143,16 @@ static inline bool is_anon_ns(struct mnt_namespace *ns) return ns->seq == 0; } +static inline bool mnt_ns_attached(const struct mount *mnt) +{ + return !RB_EMPTY_NODE(&mnt->mnt_node); +} + static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list) { - WARN_ON(!(mnt->mnt.mnt_flags & MNT_ONRB)); - mnt->mnt.mnt_flags &= ~MNT_ONRB; + WARN_ON(!mnt_ns_attached(mnt)); rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts); + RB_CLEAR_NODE(&mnt->mnt_node); list_add_tail(&mnt->mnt_list, dt_list); } diff --git a/fs/namespace.c b/fs/namespace.c index 6eec7794f707..eac057e56948 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -344,6 +344,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_HLIST_NODE(&mnt->mnt_mp_list); INIT_LIST_HEAD(&mnt->mnt_umounting); INIT_HLIST_HEAD(&mnt->mnt_stuck_children); + RB_CLEAR_NODE(&mnt->mnt_node); mnt->mnt.mnt_idmap = &nop_mnt_idmap; } return mnt; @@ -1124,7 +1125,7 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt) struct rb_node **link = &ns->mounts.rb_node; struct rb_node *parent = NULL; - WARN_ON(mnt->mnt.mnt_flags & MNT_ONRB); + WARN_ON(mnt_ns_attached(mnt)); mnt->mnt_ns = ns; while (*link) { parent = *link; @@ -1135,7 +1136,6 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt) } rb_link_node(&mnt->mnt_node, parent, link); rb_insert_color(&mnt->mnt_node, &ns->mounts); - mnt->mnt.mnt_flags |= MNT_ONRB; } /* @@ -1305,7 +1305,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, } mnt->mnt.mnt_flags = old->mnt.mnt_flags; - mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL|MNT_ONRB); + mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); atomic_inc(&sb->s_active); mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt)); @@ -1763,7 +1763,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) /* Gather the mounts to umount */ for (p = mnt; p; p = next_mnt(p, mnt)) { p->mnt.mnt_flags |= MNT_UMOUNT; - if (p->mnt.mnt_flags & MNT_ONRB) + if (mnt_ns_attached(p)) move_from_ns(p, &tmp_list); else list_move(&p->mnt_list, &tmp_list); @@ -1912,16 +1912,14 @@ static int do_umount(struct mount *mnt, int flags) event++; if (flags & MNT_DETACH) { - if (mnt->mnt.mnt_flags & MNT_ONRB || - !list_empty(&mnt->mnt_list)) + if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE); retval = 0; } else { shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { - if (mnt->mnt.mnt_flags & MNT_ONRB || - !list_empty(&mnt->mnt_list)) + if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); retval = 0; } diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 173e8b5e6a93..f9421f3e6d37 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -67,7 +67,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * * allocate a sufficiently large bvec array and may shorten the * request. */ - if (async || user_backed_iter(iter)) { + if (user_backed_iter(iter)) { n = netfs_extract_user_iter(iter, len, &wreq->iter, 0); if (n < 0) { ret = n; @@ -77,6 +77,11 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * wreq->direct_bv_count = n; wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter); } else { + /* If this is a kernel-generated async DIO request, + * assume that any resources the iterator points to + * (eg. a bio_vec array) will persist till the end of + * the op. + */ wreq->iter = *iter; } diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 21b4a54e545e..16b676c68dcd 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -152,7 +152,8 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) BUG_ON(!len); /* Renegotiate max_len (rsize) */ - if (rreq->netfs_ops->prepare_read(subreq) < 0) { + if (rreq->netfs_ops->prepare_read && + rreq->netfs_ops->prepare_read(subreq) < 0) { trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed); __set_bit(NETFS_SREQ_FAILED, &subreq->flags); } diff --git a/include/linux/mount.h b/include/linux/mount.h index c34c18b4e8f3..04213d8ef837 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -50,7 +50,7 @@ struct path; #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB) + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) #define MNT_INTERNAL 0x4000 @@ -64,7 +64,6 @@ struct path; #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 #define MNT_UMOUNT 0x8000000 -#define MNT_ONRB 0x10000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ diff --git a/include/linux/poll.h b/include/linux/poll.h index d1ea4f3714a8..12bb18e8b978 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -25,14 +25,14 @@ struct poll_table_struct; -/* +/* * structures and helpers for f_op->poll implementations */ typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *); /* - * Do not touch the structure directly, use the access functions - * poll_does_not_wait() and poll_requested_events() instead. + * Do not touch the structure directly, use the access function + * poll_requested_events() instead. */ typedef struct poll_table_struct { poll_queue_proc _qproc; @@ -41,18 +41,16 @@ typedef struct poll_table_struct { static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { - if (p && p->_qproc && wait_address) + if (p && p->_qproc) { p->_qproc(filp, wait_address, p); -} - -/* - * Return true if it is guaranteed that poll will not wait. This is the case - * if the poll() of another file descriptor in the set got an event, so there - * is no need for waiting. - */ -static inline bool poll_does_not_wait(const poll_table *p) -{ - return p == NULL || p->_qproc == NULL; + /* + * This memory barrier is paired in the wq_has_sleeper(). + * See the comment above prepare_to_wait(), we need to + * ensure that subsequent tests in this thread can't be + * reordered with __add_wait_queue() in _qproc() paths. + */ + smp_mb(); + } } /* diff --git a/include/net/sock.h b/include/net/sock.h index c383126f691d..691ca7695d1d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2297,7 +2297,7 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq) } /** - * sock_poll_wait - place memory barrier behind the poll_wait call. + * sock_poll_wait - wrapper for the poll_wait call. * @filp: file * @sock: socket to wait on * @p: poll_table @@ -2307,15 +2307,12 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq) static inline void sock_poll_wait(struct file *filp, struct socket *sock, poll_table *p) { - if (!poll_does_not_wait(p)) { - poll_wait(filp, &sock->wq.wait, p); - /* We need to be sure we are in sync with the - * socket flags modification. - * - * This memory barrier is paired in the wq_has_sleeper. - */ - smp_mb(); - } + /* Provides a barrier we need to be sure we are in sync + * with the socket flags modification. + * + * This memory barrier is paired in the wq_has_sleeper. + */ + poll_wait(filp, &sock->wq.wait, p); } static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ff691f37462c..ab45a84f4e2d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2813,13 +2813,12 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) if (unlikely(!ctx->poll_activated)) io_activate_pollwq(ctx); - - poll_wait(file, &ctx->poll_wq, wait); /* - * synchronizes with barrier from wq_has_sleeper call in - * io_commit_cqring + * provides mb() which pairs with barrier from wq_has_sleeper + * call in io_commit_cqring */ - smp_rmb(); + poll_wait(file, &ctx->poll_wq, wait); + if (!io_sqring_full(ctx)) mask |= EPOLLOUT | EPOLLWRNORM;