vfs-6.12-rc5.fixes

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZxY6XAAKCRCRxhvAZXjc
 opmUAQCu4KhzBBdZmFw3AfZFNJvYb1onT4FiU0pnyGgfvzEdEwD6AlnlgQ7DL3ZN
 WBqBzUl+DpGYJfzhkqoEGH89Fagx7QM=
 =mm68
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.12-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
 "afs:
   - Fix a lock recursion in afs_wake_up_async_call() on ->notify_lock

 netfs:
   - Drop the references to a folio immediately after the folio has been
     extracted to prevent races with future I/O collection

   - Fix a documenation build error

   - Downgrade the i_rwsem for buffered writes to fix a cifs reported
     performance regression when switching to netfslib

  vfs:
   - Explicitly return -E2BIG from openat2() if the specified size is
     unexpectedly large. This aligns openat2() with other extensible
     struct based system calls

   - When copying a mount namespace ensure that we only try to remove
     the new copy from the mount namespace rbtree if it has already been
     added to it

  nilfs:
   - Clear the buffer delay flag when clearing the buffer state clags
     when a buffer head is discarded to prevent a kernel OOPs

  ocfs2:
   - Fix an unitialized value warning in ocfs2_setattr()

  proc:
   - Fix a kernel doc warning"

* tag 'vfs-6.12-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  proc: Fix W=1 build kernel-doc warning
  afs: Fix lock recursion
  fs: Fix uninitialized value issue in from_kuid and from_kgid
  fs: don't try and remove empty rbtree node
  netfs: Downgrade i_rwsem for a buffered write
  nilfs2: fix kernel bug due to missing clearing of buffer delay flag
  openat2: explicitly return -E2BIG for (usize > PAGE_SIZE)
  netfs: fix documentation build error
  netfs: In readahead, put the folio refs as soon extracted
This commit is contained in:
Linus Torvalds 2024-10-21 10:48:24 -07:00
commit 7166c32651
12 changed files with 95 additions and 67 deletions

View File

@ -592,4 +592,3 @@ API Function Reference
.. kernel-doc:: include/linux/netfs.h
.. kernel-doc:: fs/netfs/buffered_read.c
.. kernel-doc:: fs/netfs/io.c

View File

@ -130,6 +130,7 @@ struct afs_call {
wait_queue_head_t waitq; /* processes awaiting completion */
struct work_struct async_work; /* async I/O processor */
struct work_struct work; /* actual work processor */
struct work_struct free_work; /* Deferred free processor */
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* security for this call */
@ -1331,6 +1332,7 @@ extern int __net_init afs_open_socket(struct afs_net *);
extern void __net_exit afs_close_socket(struct afs_net *);
extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
void afs_deferred_put_call(struct afs_call *call);
void afs_make_call(struct afs_call *call, gfp_t gfp);
void afs_wait_for_call_to_complete(struct afs_call *call);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,

View File

@ -18,6 +18,7 @@
struct workqueue_struct *afs_async_calls;
static void afs_deferred_free_worker(struct work_struct *work);
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_process_async_call(struct work_struct *);
@ -149,6 +150,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
refcount_set(&call->ref, 1);
INIT_WORK(&call->async_work, afs_process_async_call);
INIT_WORK(&call->free_work, afs_deferred_free_worker);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->state_lock);
call->iter = &call->def_iter;
@ -159,6 +161,36 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
return call;
}
static void afs_free_call(struct afs_call *call)
{
struct afs_net *net = call->net;
int o;
ASSERT(!work_pending(&call->async_work));
rxrpc_kernel_put_peer(call->peer);
if (call->rxcall) {
rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
rxrpc_kernel_put_call(net->socket, call->rxcall);
call->rxcall = NULL;
}
if (call->type->destructor)
call->type->destructor(call);
afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call);
kfree(call->request);
o = atomic_read(&net->nr_outstanding_calls);
trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
__builtin_return_address(0));
kfree(call);
o = atomic_dec_return(&net->nr_outstanding_calls);
if (o == 0)
wake_up_var(&net->nr_outstanding_calls);
}
/*
* Dispose of a reference on a call.
*/
@ -173,32 +205,34 @@ void afs_put_call(struct afs_call *call)
o = atomic_read(&net->nr_outstanding_calls);
trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
__builtin_return_address(0));
if (zero)
afs_free_call(call);
}
if (zero) {
ASSERT(!work_pending(&call->async_work));
ASSERT(call->type->name != NULL);
static void afs_deferred_free_worker(struct work_struct *work)
{
struct afs_call *call = container_of(work, struct afs_call, free_work);
rxrpc_kernel_put_peer(call->peer);
afs_free_call(call);
}
if (call->rxcall) {
rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
rxrpc_kernel_put_call(net->socket, call->rxcall);
call->rxcall = NULL;
}
if (call->type->destructor)
call->type->destructor(call);
/*
* Dispose of a reference on a call, deferring the cleanup to a workqueue
* to avoid lock recursion.
*/
void afs_deferred_put_call(struct afs_call *call)
{
struct afs_net *net = call->net;
unsigned int debug_id = call->debug_id;
bool zero;
int r, o;
afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call);
kfree(call->request);
trace_afs_call(call->debug_id, afs_call_trace_free, 0, o,
zero = __refcount_dec_and_test(&call->ref, &r);
o = atomic_read(&net->nr_outstanding_calls);
trace_afs_call(debug_id, afs_call_trace_put, r - 1, o,
__builtin_return_address(0));
kfree(call);
o = atomic_dec_return(&net->nr_outstanding_calls);
if (o == 0)
wake_up_var(&net->nr_outstanding_calls);
}
if (zero)
schedule_work(&call->free_work);
}
static struct afs_call *afs_get_call(struct afs_call *call,
@ -640,7 +674,8 @@ static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
}
/*
* wake up an asynchronous call
* Wake up an asynchronous call. The caller is holding the call notify
* spinlock around this, so we can't call afs_put_call().
*/
static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
unsigned long call_user_ID)
@ -657,7 +692,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
__builtin_return_address(0));
if (!queue_work(afs_async_calls, &call->async_work))
afs_put_call(call);
afs_deferred_put_call(call);
}
}

View File

@ -3944,7 +3944,9 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
namespace_unlock();
free_mnt_ns(new_ns);
ns_free_inum(&new_ns->ns);
dec_mnt_namespaces(new_ns->ucounts);
mnt_ns_release(new_ns);
return ERR_CAST(new);
}
if (user_ns != ns->user_ns) {

View File

@ -67,7 +67,8 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in
* Decant the list of folios to read into a rolling buffer.
*/
static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
struct folio_queue *folioq)
struct folio_queue *folioq,
struct folio_batch *put_batch)
{
unsigned int order, nr;
size_t size = 0;
@ -82,6 +83,9 @@ static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
order = folio_order(folio);
folioq->orders[i] = order;
size += PAGE_SIZE << order;
if (!folio_batch_add(put_batch, folio))
folio_batch_release(put_batch);
}
for (int i = nr; i < folioq_nr_slots(folioq); i++)
@ -120,6 +124,9 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
* that we will need to release later - but we don't want to do
* that until after we've started the I/O.
*/
struct folio_batch put_batch;
folio_batch_init(&put_batch);
while (rreq->submitted < subreq->start + rsize) {
struct folio_queue *tail = rreq->buffer_tail, *new;
size_t added;
@ -132,10 +139,11 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
new->prev = tail;
tail->next = new;
rreq->buffer_tail = new;
added = netfs_load_buffer_from_ra(rreq, new);
added = netfs_load_buffer_from_ra(rreq, new, &put_batch);
rreq->iter.count += added;
rreq->submitted += added;
}
folio_batch_release(&put_batch);
}
subreq->len = rsize;
@ -348,6 +356,7 @@ static int netfs_wait_for_read(struct netfs_io_request *rreq)
static int netfs_prime_buffer(struct netfs_io_request *rreq)
{
struct folio_queue *folioq;
struct folio_batch put_batch;
size_t added;
folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
@ -360,39 +369,14 @@ static int netfs_prime_buffer(struct netfs_io_request *rreq)
rreq->submitted = rreq->start;
iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
added = netfs_load_buffer_from_ra(rreq, folioq);
folio_batch_init(&put_batch);
added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch);
folio_batch_release(&put_batch);
rreq->iter.count += added;
rreq->submitted += added;
return 0;
}
/*
* Drop the ref on each folio that we inherited from the VM readahead code. We
* still have the folio locks to pin the page until we complete the I/O.
*
* Note that we can't just release the batch in each queue struct as we use the
* occupancy count in other places.
*/
static void netfs_put_ra_refs(struct folio_queue *folioq)
{
struct folio_batch fbatch;
folio_batch_init(&fbatch);
while (folioq) {
for (unsigned int slot = 0; slot < folioq_count(folioq); slot++) {
struct folio *folio = folioq_folio(folioq, slot);
if (!folio)
continue;
trace_netfs_folio(folio, netfs_folio_trace_read_put);
if (!folio_batch_add(&fbatch, folio))
folio_batch_release(&fbatch);
}
folioq = folioq->next;
}
folio_batch_release(&fbatch);
}
/**
* netfs_readahead - Helper to manage a read request
* @ractl: The description of the readahead request
@ -436,9 +420,6 @@ void netfs_readahead(struct readahead_control *ractl)
goto cleanup_free;
netfs_read_to_pagecache(rreq);
/* Release the folio refs whilst we're waiting for the I/O. */
netfs_put_ra_refs(rreq->buffer);
netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
return;

View File

@ -109,6 +109,7 @@ int netfs_start_io_write(struct inode *inode)
up_write(&inode->i_rwsem);
return -ERESTARTSYS;
}
downgrade_write(&inode->i_rwsem);
return 0;
}
EXPORT_SYMBOL(netfs_start_io_write);
@ -123,7 +124,7 @@ EXPORT_SYMBOL(netfs_start_io_write);
void netfs_end_io_write(struct inode *inode)
__releases(inode->i_rwsem)
{
up_write(&inode->i_rwsem);
up_read(&inode->i_rwsem);
}
EXPORT_SYMBOL(netfs_end_io_write);

View File

@ -77,6 +77,8 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
folio_unlock(folio);
}
}
folioq_clear(folioq, slot);
}
/*

View File

@ -77,7 +77,8 @@ void nilfs_forget_buffer(struct buffer_head *bh)
const unsigned long clear_bits =
(BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
BIT(BH_Delay));
lock_buffer(bh);
set_mask_bits(&bh->b_state, clear_bits, 0);
@ -406,7 +407,8 @@ void nilfs_clear_folio_dirty(struct folio *folio)
const unsigned long clear_bits =
(BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) |
BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected));
BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) |
BIT(BH_Delay));
bh = head;
do {

View File

@ -1129,9 +1129,12 @@ int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
trace_ocfs2_setattr(inode, dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
dentry->d_name.len, dentry->d_name.name,
attr->ia_valid, attr->ia_mode,
from_kuid(&init_user_ns, attr->ia_uid),
from_kgid(&init_user_ns, attr->ia_gid));
attr->ia_valid,
attr->ia_valid & ATTR_MODE ? attr->ia_mode : 0,
attr->ia_valid & ATTR_UID ?
from_kuid(&init_user_ns, attr->ia_uid) : 0,
attr->ia_valid & ATTR_GID ?
from_kgid(&init_user_ns, attr->ia_gid) : 0);
/* ensuring we don't even attempt to truncate a symlink */
if (S_ISLNK(inode->i_mode))

View File

@ -1457,6 +1457,8 @@ SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
if (unlikely(usize < OPEN_HOW_SIZE_VER0))
return -EINVAL;
if (unlikely(usize > PAGE_SIZE))
return -E2BIG;
err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
if (err)

View File

@ -77,7 +77,7 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file)
return single_open(file, seq_show, inode);
}
/**
/*
* Shared /proc/pid/fdinfo and /proc/pid/fdinfo/fd permission helper to ensure
* that the current task has PTRACE_MODE_READ in addition to the normal
* POSIX-like checks.

View File

@ -172,7 +172,6 @@
EM(netfs_folio_trace_read, "read") \
EM(netfs_folio_trace_read_done, "read-done") \
EM(netfs_folio_trace_read_gaps, "read-gaps") \
EM(netfs_folio_trace_read_put, "read-put") \
EM(netfs_folio_trace_read_unlock, "read-unlock") \
EM(netfs_folio_trace_redirtied, "redirtied") \
EM(netfs_folio_trace_store, "store") \