netfs: Provide readahead and readpage netfs helpers
Add a pair of helper functions:
(*) netfs_readahead()
(*) netfs_readpage()
to do the work of handling a readahead or a readpage, where the page(s)
that form part of the request may be split between the local cache, the
server or just require clearing, and may be single pages and transparent
huge pages. This is all handled within the helper.
Note that while both will read from the cache if there is data present,
only netfs_readahead() will expand the request beyond what it was asked to
do, and only netfs_readahead() will write back to the cache.
netfs_readpage(), on the other hand, is synchronous and only fetches the
page (which might be a THP) it is asked for.
The netfs gives the helper parameters from the VM, the cache cookie it
wants to use (or NULL) and a table of operations (only one of which is
mandatory):
(*) expand_readahead() [optional]
Called to allow the netfs to request an expansion of a readahead
request to meet its own alignment requirements. This is done by
changing rreq->start and rreq->len.
(*) clamp_length() [optional]
Called to allow the netfs to cut down a subrequest to meet its own
boundary requirements. If it does this, the helper will generate
additional subrequests until the full request is satisfied.
(*) is_still_valid() [optional]
Called to find out if the data just read from the cache has been
invalidated and must be reread from the server.
(*) issue_op() [required]
Called to ask the netfs to issue a read to the server. The subrequest
describes the read. The read request holds information about the file
being accessed.
The netfs can cache information in rreq->netfs_priv.
Upon completion, the netfs should set the error, transferred and can
also set FSCACHE_SREQ_CLEAR_TAIL and then call
fscache_subreq_terminated().
(*) done() [optional]
Called after the pages have been unlocked. The read request is still
pinning the file and mapping and may still be pinning pages with
PG_fscache. rreq->error indicates any error that has been
accumulated.
(*) cleanup() [optional]
Called when the helper is disposing of a finished read request. This
allows the netfs to clear rreq->netfs_priv.
Netfs support is enabled with CONFIG_NETFS_SUPPORT=y. It will be built
even if CONFIG_FSCACHE=n and in this case much of it should be optimised
away, allowing the filesystem to use it even when caching is disabled.
Changes:
v5:
- Comment why netfs_readahead() is putting pages[2].
- Use page_file_mapping() rather than page->mapping[2].
- Use page_index() rather than page->index[2].
- Use set_page_fscache()[3] rather then SetPageFsCache() as this takes an
appropriate ref too[4].
v4:
- Folded in a kerneldoc comment fix.
- Folded in a fix for the error handling in the case that ENOMEM occurs.
- Added flag to netfs_subreq_terminated() to indicate that the caller may
have been running async and stuff that might sleep needs punting to a
workqueue (can't use in_softirq()[1]).
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-and-tested-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Dave Wysochanski <dwysocha@redhat.com>
Tested-By: Marc Dionne <marc.dionne@auristor.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1]
Link: https://lore.kernel.org/r/20210321014202.GF3420@casper.infradead.org/ [2]
Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [3]
Link: https://lore.kernel.org/r/CAHk-=wh+2gbF7XEjYc=HV9w_2uVzVf7vs60BPz0gFA=+pUm3ww@mail.gmail.com/ [4]
Link: https://lore.kernel.org/r/160588497406.3465195.18003475695899726222.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161118136849.1232039.8923686136144228724.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161161032290.2537118.13400578415247339173.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/161340394873.1303470.6237319335883242536.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/161539537375.286939.16642940088716990995.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/161653795430.2770958.4947584573720000554.stgit@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/161789076581.6155.6745849361504760209.stgit@warthog.procyon.org.uk/ # v6
2020-05-13 17:41:20 +01:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/* Internal definitions for network filesystem support
|
|
|
|
*
|
|
|
|
* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
|
|
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
|
|
*/
|
|
|
|
|
2023-11-20 15:29:09 +00:00
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/seq_file.h>
|
2024-05-29 21:47:07 +01:00
|
|
|
#include <linux/folio_queue.h>
|
2022-02-17 13:30:38 +00:00
|
|
|
#include <linux/netfs.h>
|
2021-06-29 22:37:05 +01:00
|
|
|
#include <linux/fscache.h>
|
2023-11-20 15:55:18 +00:00
|
|
|
#include <linux/fscache-cache.h>
|
2022-02-17 13:30:38 +00:00
|
|
|
#include <trace/events/netfs.h>
|
2023-11-20 15:55:18 +00:00
|
|
|
#include <trace/events/fscache.h>
|
2022-02-17 13:30:38 +00:00
|
|
|
|
netfs: Provide readahead and readpage netfs helpers
Add a pair of helper functions:
(*) netfs_readahead()
(*) netfs_readpage()
to do the work of handling a readahead or a readpage, where the page(s)
that form part of the request may be split between the local cache, the
server or just require clearing, and may be single pages and transparent
huge pages. This is all handled within the helper.
Note that while both will read from the cache if there is data present,
only netfs_readahead() will expand the request beyond what it was asked to
do, and only netfs_readahead() will write back to the cache.
netfs_readpage(), on the other hand, is synchronous and only fetches the
page (which might be a THP) it is asked for.
The netfs gives the helper parameters from the VM, the cache cookie it
wants to use (or NULL) and a table of operations (only one of which is
mandatory):
(*) expand_readahead() [optional]
Called to allow the netfs to request an expansion of a readahead
request to meet its own alignment requirements. This is done by
changing rreq->start and rreq->len.
(*) clamp_length() [optional]
Called to allow the netfs to cut down a subrequest to meet its own
boundary requirements. If it does this, the helper will generate
additional subrequests until the full request is satisfied.
(*) is_still_valid() [optional]
Called to find out if the data just read from the cache has been
invalidated and must be reread from the server.
(*) issue_op() [required]
Called to ask the netfs to issue a read to the server. The subrequest
describes the read. The read request holds information about the file
being accessed.
The netfs can cache information in rreq->netfs_priv.
Upon completion, the netfs should set the error, transferred and can
also set FSCACHE_SREQ_CLEAR_TAIL and then call
fscache_subreq_terminated().
(*) done() [optional]
Called after the pages have been unlocked. The read request is still
pinning the file and mapping and may still be pinning pages with
PG_fscache. rreq->error indicates any error that has been
accumulated.
(*) cleanup() [optional]
Called when the helper is disposing of a finished read request. This
allows the netfs to clear rreq->netfs_priv.
Netfs support is enabled with CONFIG_NETFS_SUPPORT=y. It will be built
even if CONFIG_FSCACHE=n and in this case much of it should be optimised
away, allowing the filesystem to use it even when caching is disabled.
Changes:
v5:
- Comment why netfs_readahead() is putting pages[2].
- Use page_file_mapping() rather than page->mapping[2].
- Use page_index() rather than page->index[2].
- Use set_page_fscache()[3] rather then SetPageFsCache() as this takes an
appropriate ref too[4].
v4:
- Folded in a kerneldoc comment fix.
- Folded in a fix for the error handling in the case that ENOMEM occurs.
- Added flag to netfs_subreq_terminated() to indicate that the caller may
have been running async and stuff that might sleep needs punting to a
workqueue (can't use in_softirq()[1]).
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-and-tested-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Dave Wysochanski <dwysocha@redhat.com>
Tested-By: Marc Dionne <marc.dionne@auristor.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1]
Link: https://lore.kernel.org/r/20210321014202.GF3420@casper.infradead.org/ [2]
Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [3]
Link: https://lore.kernel.org/r/CAHk-=wh+2gbF7XEjYc=HV9w_2uVzVf7vs60BPz0gFA=+pUm3ww@mail.gmail.com/ [4]
Link: https://lore.kernel.org/r/160588497406.3465195.18003475695899726222.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161118136849.1232039.8923686136144228724.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161161032290.2537118.13400578415247339173.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/161340394873.1303470.6237319335883242536.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/161539537375.286939.16642940088716990995.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/161653795430.2770958.4947584573720000554.stgit@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/161789076581.6155.6745849361504760209.stgit@warthog.procyon.org.uk/ # v6
2020-05-13 17:41:20 +01:00
|
|
|
#ifdef pr_fmt
|
|
|
|
#undef pr_fmt
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define pr_fmt(fmt) "netfs: " fmt
|
|
|
|
|
2022-03-01 15:55:15 +00:00
|
|
|
/*
|
|
|
|
* buffered_read.c
|
|
|
|
*/
|
2021-06-17 13:09:21 +01:00
|
|
|
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
|
|
|
|
size_t offset, size_t len);
|
2022-03-01 15:55:15 +00:00
|
|
|
|
2022-03-01 15:25:00 +00:00
|
|
|
/*
|
|
|
|
* main.c
|
|
|
|
*/
|
2024-07-18 21:07:32 +01:00
|
|
|
extern unsigned int netfs_debug;
|
2022-03-04 10:34:27 +00:00
|
|
|
extern struct list_head netfs_io_requests;
|
|
|
|
extern spinlock_t netfs_proc_lock;
|
2024-03-15 14:37:18 +00:00
|
|
|
extern mempool_t netfs_request_pool;
|
|
|
|
extern mempool_t netfs_subrequest_pool;
|
2022-03-04 10:34:27 +00:00
|
|
|
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq)
|
|
|
|
{
|
|
|
|
spin_lock(&netfs_proc_lock);
|
|
|
|
list_add_tail_rcu(&rreq->proc_link, &netfs_io_requests);
|
|
|
|
spin_unlock(&netfs_proc_lock);
|
|
|
|
}
|
|
|
|
static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq)
|
|
|
|
{
|
|
|
|
if (!list_empty(&rreq->proc_link)) {
|
|
|
|
spin_lock(&netfs_proc_lock);
|
|
|
|
list_del_rcu(&rreq->proc_link);
|
|
|
|
spin_unlock(&netfs_proc_lock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) {}
|
|
|
|
static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
|
|
|
|
#endif
|
2022-03-01 15:25:00 +00:00
|
|
|
|
2023-09-22 13:25:22 +01:00
|
|
|
/*
|
|
|
|
* misc.c
|
|
|
|
*/
|
netfs: Fix write oops in generic/346 (9p) and generic/074 (cifs)
In netfslib, a buffered writeback operation has a 'write queue' of folios
that are being written, held in a linear sequence of folio_queue structs.
The 'issuer' adds new folio_queues on the leading edge of the queue and
populates each one progressively; the 'collector' pops them off the
trailing edge and discards them and the folios they point to as they are
consumed.
The queue is required to always retain at least one folio_queue structure.
This allows the queue to be accessed without locking and with just a bit of
barriering.
When a new subrequest is prepared, its ->io_iter iterator is pointed at the
current end of the write queue and then the iterator is extended as more
data is added to the queue until the subrequest is committed.
Now, the problem is that the folio_queue at the leading edge of the write
queue when a subrequest is prepared might have been entirely consumed - but
not yet removed from the queue as it is the only remaining one and is
preventing the queue from collapsing.
So, what happens is that subreq->io_iter is pointed at the spent
folio_queue, then a new folio_queue is added, and, at that point, the
collector is at entirely at liberty to immediately delete the spent
folio_queue.
This leaves the subreq->io_iter pointing at a freed object. If the system
is lucky, iterate_folioq() sees ->io_iter, sees the as-yet uncorrupted
freed object and advances to the next folio_queue in the queue.
In the case seen, however, the freed object gets recycled and put back onto
the queue at the tail and filled to the end. This confuses
iterate_folioq() and it tries to step ->next, which may be NULL - resulting
in an oops.
Fix this by the following means:
(1) When preparing a write subrequest, make sure there's a folio_queue
struct with space in it at the leading edge of the queue. A function
to make space is split out of the function to append a folio so that
it can be called for this purpose.
(2) If the request struct iterator is pointing to a completely spent
folio_queue when we make space, then advance the iterator to the newly
allocated folio_queue. The subrequest's iterator will then be set
from this.
The oops could be triggered using the generic/346 xfstest with a filesystem
on9P over TCP with cache=loose. The oops looked something like:
BUG: kernel NULL pointer dereference, address: 0000000000000008
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
...
RIP: 0010:_copy_from_iter+0x2db/0x530
...
Call Trace:
<TASK>
...
p9pdu_vwritef+0x3d8/0x5d0
p9_client_prepare_req+0xa8/0x140
p9_client_rpc+0x81/0x280
p9_client_write+0xcf/0x1c0
v9fs_issue_write+0x87/0xc0
netfs_advance_write+0xa0/0xb0
netfs_write_folio.isra.0+0x42d/0x500
netfs_writepages+0x15a/0x1f0
do_writepages+0xd1/0x220
filemap_fdatawrite_wbc+0x5c/0x80
v9fs_mmap_vm_close+0x7d/0xb0
remove_vma+0x35/0x70
vms_complete_munmap_vmas+0x11a/0x170
do_vmi_align_munmap+0x17d/0x1c0
do_vmi_munmap+0x13e/0x150
__vm_munmap+0x92/0xd0
__x64_sys_munmap+0x17/0x20
do_syscall_64+0x80/0xe0
entry_SYSCALL_64_after_hwframe+0x71/0x79
This also fixed a similar-looking issue with cifs and generic/074.
Fixes: cd0277ed0c18 ("netfs: Use new folio_queue data type and iterator instead of xarray iter")
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202409180928.f20b5a08-oliver.sang@intel.com
Closes: https://lore.kernel.org/oe-lkp/202409131438.3f225fbf-oliver.sang@intel.com
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: kernel test robot <oliver.sang@intel.com>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: v9fs@lists.linux.dev
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2024-09-26 14:58:30 +01:00
|
|
|
struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq);
|
2024-05-29 21:47:07 +01:00
|
|
|
int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
|
|
|
|
bool needs_put);
|
|
|
|
struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq);
|
|
|
|
void netfs_clear_buffer(struct netfs_io_request *rreq);
|
2024-07-08 14:49:45 +01:00
|
|
|
void netfs_reset_iter(struct netfs_io_subrequest *subreq);
|
2023-09-22 13:25:22 +01:00
|
|
|
|
2022-02-17 13:30:38 +00:00
|
|
|
/*
|
|
|
|
* objects.c
|
|
|
|
*/
|
2021-08-26 09:24:42 -04:00
|
|
|
struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
|
|
|
|
struct file *file,
|
|
|
|
loff_t start, size_t len,
|
|
|
|
enum netfs_io_origin origin);
|
2022-02-17 21:13:05 +00:00
|
|
|
void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
|
2022-02-17 13:30:38 +00:00
|
|
|
void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async);
|
2022-02-17 21:13:05 +00:00
|
|
|
void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
|
|
|
|
enum netfs_rreq_ref_trace what);
|
2022-02-17 13:30:38 +00:00
|
|
|
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq);
|
|
|
|
|
2022-02-17 21:13:05 +00:00
|
|
|
static inline void netfs_see_request(struct netfs_io_request *rreq,
|
|
|
|
enum netfs_rreq_ref_trace what)
|
|
|
|
{
|
|
|
|
trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), what);
|
|
|
|
}
|
|
|
|
|
2024-07-02 00:40:22 +01:00
|
|
|
/*
|
|
|
|
* read_collect.c
|
|
|
|
*/
|
|
|
|
void netfs_read_termination_worker(struct work_struct *work);
|
|
|
|
void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* read_pgpriv2.c
|
|
|
|
*/
|
|
|
|
void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
|
|
|
|
struct netfs_io_request *rreq,
|
|
|
|
struct folio_queue *folioq,
|
|
|
|
int slot);
|
|
|
|
void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq);
|
|
|
|
bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* read_retry.c
|
|
|
|
*/
|
|
|
|
void netfs_retry_reads(struct netfs_io_request *rreq);
|
|
|
|
void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq);
|
|
|
|
|
2020-11-03 11:32:41 +00:00
|
|
|
/*
|
|
|
|
* stats.c
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_NETFS_STATS
|
netfs: Implement unbuffered/DIO read support
Implement support for unbuffered and DIO reads in the netfs library,
utilising the existing read helper code to do block splitting and
individual queuing. The code also handles extraction of the destination
buffer from the supplied iterator, allowing async unbuffered reads to take
place.
The read will be split up according to the rsize setting and, if supplied,
the ->clamp_length() method. Note that the next subrequest will be issued
as soon as issue_op returns, without waiting for previous ones to finish.
The network filesystem needs to pause or handle queuing them if it doesn't
want to fire them all at the server simultaneously.
Once all the subrequests have finished, the state will be assessed and the
amount of data to be indicated as having being obtained will be
determined. As the subrequests may finish in any order, if an intermediate
subrequest is short, any further subrequests may be copied into the buffer
and then abandoned.
In the future, this will also take care of doing an unbuffered read from
encrypted content, with the decryption being done by the library.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
2022-01-14 17:39:55 +00:00
|
|
|
extern atomic_t netfs_n_rh_dio_read;
|
2020-11-03 11:32:41 +00:00
|
|
|
extern atomic_t netfs_n_rh_readahead;
|
2024-03-26 08:48:44 +00:00
|
|
|
extern atomic_t netfs_n_rh_read_folio;
|
2020-11-03 11:32:41 +00:00
|
|
|
extern atomic_t netfs_n_rh_rreq;
|
|
|
|
extern atomic_t netfs_n_rh_sreq;
|
|
|
|
extern atomic_t netfs_n_rh_download;
|
|
|
|
extern atomic_t netfs_n_rh_download_done;
|
|
|
|
extern atomic_t netfs_n_rh_download_failed;
|
|
|
|
extern atomic_t netfs_n_rh_download_instead;
|
|
|
|
extern atomic_t netfs_n_rh_read;
|
|
|
|
extern atomic_t netfs_n_rh_read_done;
|
|
|
|
extern atomic_t netfs_n_rh_read_failed;
|
|
|
|
extern atomic_t netfs_n_rh_zero;
|
|
|
|
extern atomic_t netfs_n_rh_short_read;
|
|
|
|
extern atomic_t netfs_n_rh_write;
|
2020-09-22 11:06:07 +01:00
|
|
|
extern atomic_t netfs_n_rh_write_begin;
|
2020-11-03 11:32:41 +00:00
|
|
|
extern atomic_t netfs_n_rh_write_done;
|
|
|
|
extern atomic_t netfs_n_rh_write_failed;
|
2020-09-22 11:06:07 +01:00
|
|
|
extern atomic_t netfs_n_rh_write_zskip;
|
2024-03-26 08:48:44 +00:00
|
|
|
extern atomic_t netfs_n_wh_buffered_write;
|
|
|
|
extern atomic_t netfs_n_wh_writethrough;
|
|
|
|
extern atomic_t netfs_n_wh_dio_write;
|
|
|
|
extern atomic_t netfs_n_wh_writepages;
|
2024-07-02 00:40:22 +01:00
|
|
|
extern atomic_t netfs_n_wh_copy_to_cache;
|
2024-01-04 15:52:11 +00:00
|
|
|
extern atomic_t netfs_n_wh_wstream_conflict;
|
2022-02-09 19:52:13 +00:00
|
|
|
extern atomic_t netfs_n_wh_upload;
|
|
|
|
extern atomic_t netfs_n_wh_upload_done;
|
|
|
|
extern atomic_t netfs_n_wh_upload_failed;
|
|
|
|
extern atomic_t netfs_n_wh_write;
|
|
|
|
extern atomic_t netfs_n_wh_write_done;
|
|
|
|
extern atomic_t netfs_n_wh_write_failed;
|
2024-05-31 15:48:29 +01:00
|
|
|
extern atomic_t netfs_n_wb_lock_skip;
|
|
|
|
extern atomic_t netfs_n_wb_lock_wait;
|
2024-05-29 21:47:07 +01:00
|
|
|
extern atomic_t netfs_n_folioq;
|
2020-11-03 11:32:41 +00:00
|
|
|
|
2023-11-21 15:43:52 +00:00
|
|
|
int netfs_stats_show(struct seq_file *m, void *v);
|
2020-11-03 11:32:41 +00:00
|
|
|
|
|
|
|
static inline void netfs_stat(atomic_t *stat)
|
|
|
|
{
|
|
|
|
atomic_inc(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void netfs_stat_d(atomic_t *stat)
|
|
|
|
{
|
|
|
|
atomic_dec(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
netfs: Provide readahead and readpage netfs helpers
Add a pair of helper functions:
(*) netfs_readahead()
(*) netfs_readpage()
to do the work of handling a readahead or a readpage, where the page(s)
that form part of the request may be split between the local cache, the
server or just require clearing, and may be single pages and transparent
huge pages. This is all handled within the helper.
Note that while both will read from the cache if there is data present,
only netfs_readahead() will expand the request beyond what it was asked to
do, and only netfs_readahead() will write back to the cache.
netfs_readpage(), on the other hand, is synchronous and only fetches the
page (which might be a THP) it is asked for.
The netfs gives the helper parameters from the VM, the cache cookie it
wants to use (or NULL) and a table of operations (only one of which is
mandatory):
(*) expand_readahead() [optional]
Called to allow the netfs to request an expansion of a readahead
request to meet its own alignment requirements. This is done by
changing rreq->start and rreq->len.
(*) clamp_length() [optional]
Called to allow the netfs to cut down a subrequest to meet its own
boundary requirements. If it does this, the helper will generate
additional subrequests until the full request is satisfied.
(*) is_still_valid() [optional]
Called to find out if the data just read from the cache has been
invalidated and must be reread from the server.
(*) issue_op() [required]
Called to ask the netfs to issue a read to the server. The subrequest
describes the read. The read request holds information about the file
being accessed.
The netfs can cache information in rreq->netfs_priv.
Upon completion, the netfs should set the error, transferred and can
also set FSCACHE_SREQ_CLEAR_TAIL and then call
fscache_subreq_terminated().
(*) done() [optional]
Called after the pages have been unlocked. The read request is still
pinning the file and mapping and may still be pinning pages with
PG_fscache. rreq->error indicates any error that has been
accumulated.
(*) cleanup() [optional]
Called when the helper is disposing of a finished read request. This
allows the netfs to clear rreq->netfs_priv.
Netfs support is enabled with CONFIG_NETFS_SUPPORT=y. It will be built
even if CONFIG_FSCACHE=n and in this case much of it should be optimised
away, allowing the filesystem to use it even when caching is disabled.
Changes:
v5:
- Comment why netfs_readahead() is putting pages[2].
- Use page_file_mapping() rather than page->mapping[2].
- Use page_index() rather than page->index[2].
- Use set_page_fscache()[3] rather then SetPageFsCache() as this takes an
appropriate ref too[4].
v4:
- Folded in a kerneldoc comment fix.
- Folded in a fix for the error handling in the case that ENOMEM occurs.
- Added flag to netfs_subreq_terminated() to indicate that the caller may
have been running async and stuff that might sleep needs punting to a
workqueue (can't use in_softirq()[1]).
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-and-tested-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Dave Wysochanski <dwysocha@redhat.com>
Tested-By: Marc Dionne <marc.dionne@auristor.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1]
Link: https://lore.kernel.org/r/20210321014202.GF3420@casper.infradead.org/ [2]
Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [3]
Link: https://lore.kernel.org/r/CAHk-=wh+2gbF7XEjYc=HV9w_2uVzVf7vs60BPz0gFA=+pUm3ww@mail.gmail.com/ [4]
Link: https://lore.kernel.org/r/160588497406.3465195.18003475695899726222.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161118136849.1232039.8923686136144228724.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161161032290.2537118.13400578415247339173.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/161340394873.1303470.6237319335883242536.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/161539537375.286939.16642940088716990995.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/161653795430.2770958.4947584573720000554.stgit@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/161789076581.6155.6745849361504760209.stgit@warthog.procyon.org.uk/ # v6
2020-05-13 17:41:20 +01:00
|
|
|
#define netfs_stat(x) do {} while(0)
|
|
|
|
#define netfs_stat_d(x) do {} while(0)
|
2020-11-03 11:32:41 +00:00
|
|
|
#endif
|
netfs: Provide readahead and readpage netfs helpers
Add a pair of helper functions:
(*) netfs_readahead()
(*) netfs_readpage()
to do the work of handling a readahead or a readpage, where the page(s)
that form part of the request may be split between the local cache, the
server or just require clearing, and may be single pages and transparent
huge pages. This is all handled within the helper.
Note that while both will read from the cache if there is data present,
only netfs_readahead() will expand the request beyond what it was asked to
do, and only netfs_readahead() will write back to the cache.
netfs_readpage(), on the other hand, is synchronous and only fetches the
page (which might be a THP) it is asked for.
The netfs gives the helper parameters from the VM, the cache cookie it
wants to use (or NULL) and a table of operations (only one of which is
mandatory):
(*) expand_readahead() [optional]
Called to allow the netfs to request an expansion of a readahead
request to meet its own alignment requirements. This is done by
changing rreq->start and rreq->len.
(*) clamp_length() [optional]
Called to allow the netfs to cut down a subrequest to meet its own
boundary requirements. If it does this, the helper will generate
additional subrequests until the full request is satisfied.
(*) is_still_valid() [optional]
Called to find out if the data just read from the cache has been
invalidated and must be reread from the server.
(*) issue_op() [required]
Called to ask the netfs to issue a read to the server. The subrequest
describes the read. The read request holds information about the file
being accessed.
The netfs can cache information in rreq->netfs_priv.
Upon completion, the netfs should set the error, transferred and can
also set FSCACHE_SREQ_CLEAR_TAIL and then call
fscache_subreq_terminated().
(*) done() [optional]
Called after the pages have been unlocked. The read request is still
pinning the file and mapping and may still be pinning pages with
PG_fscache. rreq->error indicates any error that has been
accumulated.
(*) cleanup() [optional]
Called when the helper is disposing of a finished read request. This
allows the netfs to clear rreq->netfs_priv.
Netfs support is enabled with CONFIG_NETFS_SUPPORT=y. It will be built
even if CONFIG_FSCACHE=n and in this case much of it should be optimised
away, allowing the filesystem to use it even when caching is disabled.
Changes:
v5:
- Comment why netfs_readahead() is putting pages[2].
- Use page_file_mapping() rather than page->mapping[2].
- Use page_index() rather than page->index[2].
- Use set_page_fscache()[3] rather then SetPageFsCache() as this takes an
appropriate ref too[4].
v4:
- Folded in a kerneldoc comment fix.
- Folded in a fix for the error handling in the case that ENOMEM occurs.
- Added flag to netfs_subreq_terminated() to indicate that the caller may
have been running async and stuff that might sleep needs punting to a
workqueue (can't use in_softirq()[1]).
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-and-tested-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Dave Wysochanski <dwysocha@redhat.com>
Tested-By: Marc Dionne <marc.dionne@auristor.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1]
Link: https://lore.kernel.org/r/20210321014202.GF3420@casper.infradead.org/ [2]
Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [3]
Link: https://lore.kernel.org/r/CAHk-=wh+2gbF7XEjYc=HV9w_2uVzVf7vs60BPz0gFA=+pUm3ww@mail.gmail.com/ [4]
Link: https://lore.kernel.org/r/160588497406.3465195.18003475695899726222.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161118136849.1232039.8923686136144228724.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161161032290.2537118.13400578415247339173.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/161340394873.1303470.6237319335883242536.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/161539537375.286939.16642940088716990995.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/161653795430.2770958.4947584573720000554.stgit@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/161789076581.6155.6745849361504760209.stgit@warthog.procyon.org.uk/ # v6
2020-05-13 17:41:20 +01:00
|
|
|
|
netfs: New writeback implementation
The current netfslib writeback implementation creates writeback requests of
contiguous folio data and then separately tiles subrequests over the space
twice, once for the server and once for the cache. This creates a few
issues:
(1) Every time there's a discontiguity or a change between writing to only
one destination or writing to both, it must create a new request.
This makes it harder to do vectored writes.
(2) The folios don't have the writeback mark removed until the end of the
request - and a request could be hundreds of megabytes.
(3) In future, I want to support a larger cache granularity, which will
require aggregation of some folios that contain unmodified data (which
only need to go to the cache) and some which contain modifications
(which need to be uploaded and stored to the cache) - but, currently,
these are treated as discontiguous.
There's also a move to get everyone to use writeback_iter() to extract
writable folios from the pagecache. That said, currently writeback_iter()
has some issues that make it less than ideal:
(1) there's no way to cancel the iteration, even if you find a "temporary"
error that means the current folio and all subsequent folios are going
to fail;
(2) there's no way to filter the folios being written back - something
that will impact Ceph with it's ordered snap system;
(3) and if you get a folio you can't immediately deal with (say you need
to flush the preceding writes), you are left with a folio hanging in
the locked state for the duration, when really we should unlock it and
relock it later.
In this new implementation, I use writeback_iter() to pump folios,
progressively creating two parallel, but separate streams and cleaning up
the finished folios as the subrequests complete. Either or both streams
can contain gaps, and the subrequests in each stream can be of variable
size, don't need to align with each other and don't need to align with the
folios.
Indeed, subrequests can cross folio boundaries, may cover several folios or
a folio may be spanned by multiple folios, e.g.:
+---+---+-----+-----+---+----------+
Folios: | | | | | | |
+---+---+-----+-----+---+----------+
+------+------+ +----+----+
Upload: | | |.....| | |
+------+------+ +----+----+
+------+------+------+------+------+
Cache: | | | | | |
+------+------+------+------+------+
The progressive subrequest construction permits the algorithm to be
preparing both the next upload to the server and the next write to the
cache whilst the previous ones are already in progress. Throttling can be
applied to control the rate of production of subrequests - and, in any
case, we probably want to write them to the server in ascending order,
particularly if the file will be extended.
Content crypto can also be prepared at the same time as the subrequests and
run asynchronously, with the prepped requests being stalled until the
crypto catches up with them. This might also be useful for transport
crypto, but that happens at a lower layer, so probably would be harder to
pull off.
The algorithm is split into three parts:
(1) The issuer. This walks through the data, packaging it up, encrypting
it and creating subrequests. The part of this that generates
subrequests only deals with file positions and spans and so is usable
for DIO/unbuffered writes as well as buffered writes.
(2) The collector. This asynchronously collects completed subrequests,
unlocks folios, frees crypto buffers and performs any retries. This
runs in a work queue so that the issuer can return to the caller for
writeback (so that the VM can have its kswapd thread back) or async
writes.
(3) The retryer. This pauses the issuer, waits for all outstanding
subrequests to complete and then goes through the failed subrequests
to reissue them. This may involve reprepping them (with cifs, the
credits must be renegotiated, and a subrequest may need splitting),
and doing RMW for content crypto if there's a conflicting change on
the server.
[!] Note that some of the functions are prefixed with "new_" to avoid
clashes with existing functions. These will be renamed in a later patch
that cuts over to the new algorithm.
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
2024-03-18 16:52:05 +00:00
|
|
|
/*
|
|
|
|
* write_collect.c
|
|
|
|
*/
|
|
|
|
int netfs_folio_written_back(struct folio *folio);
|
|
|
|
void netfs_write_collection_worker(struct work_struct *work);
|
|
|
|
void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* write_issue.c
|
|
|
|
*/
|
|
|
|
struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
|
|
|
|
struct file *file,
|
|
|
|
loff_t start,
|
|
|
|
enum netfs_io_origin origin);
|
|
|
|
void netfs_reissue_write(struct netfs_io_stream *stream,
|
2024-05-29 21:47:07 +01:00
|
|
|
struct netfs_io_subrequest *subreq,
|
|
|
|
struct iov_iter *source);
|
2024-07-02 00:40:22 +01:00
|
|
|
void netfs_issue_write(struct netfs_io_request *wreq,
|
|
|
|
struct netfs_io_stream *stream);
|
|
|
|
int netfs_advance_write(struct netfs_io_request *wreq,
|
|
|
|
struct netfs_io_stream *stream,
|
|
|
|
loff_t start, size_t len, bool to_eof);
|
2024-03-08 12:36:05 +00:00
|
|
|
struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
|
|
|
|
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
|
|
|
|
struct folio *folio, size_t copied, bool to_page_end,
|
|
|
|
struct folio **writethrough_cache);
|
|
|
|
int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
|
|
|
|
struct folio *writethrough_cache);
|
netfs: New writeback implementation
The current netfslib writeback implementation creates writeback requests of
contiguous folio data and then separately tiles subrequests over the space
twice, once for the server and once for the cache. This creates a few
issues:
(1) Every time there's a discontiguity or a change between writing to only
one destination or writing to both, it must create a new request.
This makes it harder to do vectored writes.
(2) The folios don't have the writeback mark removed until the end of the
request - and a request could be hundreds of megabytes.
(3) In future, I want to support a larger cache granularity, which will
require aggregation of some folios that contain unmodified data (which
only need to go to the cache) and some which contain modifications
(which need to be uploaded and stored to the cache) - but, currently,
these are treated as discontiguous.
There's also a move to get everyone to use writeback_iter() to extract
writable folios from the pagecache. That said, currently writeback_iter()
has some issues that make it less than ideal:
(1) there's no way to cancel the iteration, even if you find a "temporary"
error that means the current folio and all subsequent folios are going
to fail;
(2) there's no way to filter the folios being written back - something
that will impact Ceph with it's ordered snap system;
(3) and if you get a folio you can't immediately deal with (say you need
to flush the preceding writes), you are left with a folio hanging in
the locked state for the duration, when really we should unlock it and
relock it later.
In this new implementation, I use writeback_iter() to pump folios,
progressively creating two parallel, but separate streams and cleaning up
the finished folios as the subrequests complete. Either or both streams
can contain gaps, and the subrequests in each stream can be of variable
size, don't need to align with each other and don't need to align with the
folios.
Indeed, subrequests can cross folio boundaries, may cover several folios or
a folio may be spanned by multiple folios, e.g.:
+---+---+-----+-----+---+----------+
Folios: | | | | | | |
+---+---+-----+-----+---+----------+
+------+------+ +----+----+
Upload: | | |.....| | |
+------+------+ +----+----+
+------+------+------+------+------+
Cache: | | | | | |
+------+------+------+------+------+
The progressive subrequest construction permits the algorithm to be
preparing both the next upload to the server and the next write to the
cache whilst the previous ones are already in progress. Throttling can be
applied to control the rate of production of subrequests - and, in any
case, we probably want to write them to the server in ascending order,
particularly if the file will be extended.
Content crypto can also be prepared at the same time as the subrequests and
run asynchronously, with the prepped requests being stalled until the
crypto catches up with them. This might also be useful for transport
crypto, but that happens at a lower layer, so probably would be harder to
pull off.
The algorithm is split into three parts:
(1) The issuer. This walks through the data, packaging it up, encrypting
it and creating subrequests. The part of this that generates
subrequests only deals with file positions and spans and so is usable
for DIO/unbuffered writes as well as buffered writes.
(2) The collector. This asynchronously collects completed subrequests,
unlocks folios, frees crypto buffers and performs any retries. This
runs in a work queue so that the issuer can return to the caller for
writeback (so that the VM can have its kswapd thread back) or async
writes.
(3) The retryer. This pauses the issuer, waits for all outstanding
subrequests to complete and then goes through the failed subrequests
to reissue them. This may involve reprepping them (with cifs, the
credits must be renegotiated, and a subrequest may need splitting),
and doing RMW for content crypto if there's a conflicting change on
the server.
[!] Note that some of the functions are prefixed with "new_" to avoid
clashes with existing functions. These will be renamed in a later patch
that cuts over to the new algorithm.
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
2024-03-18 16:52:05 +00:00
|
|
|
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
|
|
|
|
|
2021-06-29 22:37:05 +01:00
|
|
|
/*
|
|
|
|
* Miscellaneous functions.
|
|
|
|
*/
|
netfs: Fix gcc-12 warning by embedding vfs inode in netfs_i_context
While randstruct was satisfied with using an open-coded "void *" offset
cast for the netfs_i_context <-> inode casting, __builtin_object_size() as
used by FORTIFY_SOURCE was not as easily fooled. This was causing the
following complaint[1] from gcc v12:
In file included from include/linux/string.h:253,
from include/linux/ceph/ceph_debug.h:7,
from fs/ceph/inode.c:2:
In function 'fortify_memset_chk',
inlined from 'netfs_i_context_init' at include/linux/netfs.h:326:2,
inlined from 'ceph_alloc_inode' at fs/ceph/inode.c:463:2:
include/linux/fortify-string.h:242:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
242 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fix this by embedding a struct inode into struct netfs_i_context (which
should perhaps be renamed to struct netfs_inode). The struct inode
vfs_inode fields are then removed from the 9p, afs, ceph and cifs inode
structs and vfs_inode is then simply changed to "netfs.inode" in those
filesystems.
Further, rename netfs_i_context to netfs_inode, get rid of the
netfs_inode() function that converted a netfs_i_context pointer to an
inode pointer (that can now be done with &ctx->inode) and rename the
netfs_i_context() function to netfs_inode() (which is now a wrapper
around container_of()).
Most of the changes were done with:
perl -p -i -e 's/vfs_inode/netfs.inode/'g \
`git grep -l 'vfs_inode' -- fs/{9p,afs,ceph,cifs}/*.[ch]`
Kees suggested doing it with a pair structure[2] and a special
declarator to insert that into the network filesystem's inode
wrapper[3], but I think it's cleaner to embed it - and then it doesn't
matter if struct randomisation reorders things.
Dave Chinner suggested using a filesystem-specific VFS_I() function in
each filesystem to convert that filesystem's own inode wrapper struct
into the VFS inode struct[4].
Version #2:
- Fix a couple of missed name changes due to a disabled cifs option.
- Rename nfs_i_context to nfs_inode
- Use "netfs" instead of "nic" as the member name in per-fs inode wrapper
structs.
[ This also undoes commit 507160f46c55 ("netfs: gcc-12: temporarily
disable '-Wattribute-warning' for now") that is no longer needed ]
Fixes: bc899ee1c898 ("netfs: Add a netfs inode context")
Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
cc: Jonathan Corbet <corbet@lwn.net>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Steve French <smfrench@gmail.com>
cc: William Kucharski <william.kucharski@oracle.com>
cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
cc: Dave Chinner <david@fromorbit.com>
cc: linux-doc@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: samba-technical@lists.samba.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/d2ad3a3d7bdd794c6efb562d2f2b655fb67756b9.camel@kernel.org/ [1]
Link: https://lore.kernel.org/r/20220517210230.864239-1-keescook@chromium.org/ [2]
Link: https://lore.kernel.org/r/20220518202212.2322058-1-keescook@chromium.org/ [3]
Link: https://lore.kernel.org/r/20220524101205.GI2306852@dread.disaster.area/ [4]
Link: https://lore.kernel.org/r/165296786831.3591209.12111293034669289733.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165305805651.4094995.7763502506786714216.stgit@warthog.procyon.org.uk # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-06-09 21:46:04 +01:00
|
|
|
static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
|
2021-06-29 22:37:05 +01:00
|
|
|
{
|
|
|
|
#if IS_ENABLED(CONFIG_FSCACHE)
|
|
|
|
struct fscache_cookie *cookie = ctx->cache;
|
|
|
|
|
|
|
|
return fscache_cookie_valid(cookie) && cookie->cache_priv &&
|
|
|
|
fscache_cookie_enabled(cookie);
|
|
|
|
#else
|
|
|
|
return false;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2023-09-29 17:28:25 +01:00
|
|
|
/*
|
|
|
|
* Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
|
|
|
|
*/
|
|
|
|
static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
|
|
|
|
{
|
netfs: Replace PG_fscache by setting folio->private and marking dirty
When dirty data is being written to the cache, setting/waiting on/clearing
the fscache flag is always done in tandem with setting/waiting on/clearing
the writeback flag. The netfslib buffered write routines wait on and set
both flags and the write request cleanup clears both flags, so the fscache
flag is almost superfluous.
The reason it isn't superfluous is because the fscache flag is also used to
indicate that data just read from the server is being written to the cache.
The flag is used to prevent a race involving overlapping direct-I/O writes
to the cache.
Change this to indicate that a page is in need of being copied to the cache
by placing a magic value in folio->private and marking the folios dirty.
Then when the writeback code sees a folio marked in this way, it only
writes it to the cache and not to the server.
If a folio that has this magic value set is modified, the value is just
replaced and the folio will then be uplodaded too.
With this, PG_fscache is no longer required by the netfslib core, 9p and
afs.
Ceph and nfs, however, still need to use the old PG_fscache-based tracking.
To deal with this, a flag, NETFS_ICTX_USE_PGPRIV2, now has to be set on the
flags in the netfs_inode struct for those filesystems. This reenables the
use of PG_fscache in that inode. 9p and afs use the netfslib write helpers
so get switched over; cifs, for the moment, does page-by-page manual access
to the cache, so doesn't use PG_fscache and is unaffected.
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
cc: Shyam Prasad N <sprasad@microsoft.com>
cc: Tom Talpey <tom@talpey.com>
cc: Bharath SM <bharathsm@microsoft.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna@kernel.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
2024-03-19 10:00:09 +00:00
|
|
|
if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
|
2023-09-29 17:28:25 +01:00
|
|
|
refcount_inc(&netfs_group->ref);
|
|
|
|
return netfs_group;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
|
|
|
|
*/
|
|
|
|
static inline void netfs_put_group(struct netfs_group *netfs_group)
|
|
|
|
{
|
netfs: Replace PG_fscache by setting folio->private and marking dirty
When dirty data is being written to the cache, setting/waiting on/clearing
the fscache flag is always done in tandem with setting/waiting on/clearing
the writeback flag. The netfslib buffered write routines wait on and set
both flags and the write request cleanup clears both flags, so the fscache
flag is almost superfluous.
The reason it isn't superfluous is because the fscache flag is also used to
indicate that data just read from the server is being written to the cache.
The flag is used to prevent a race involving overlapping direct-I/O writes
to the cache.
Change this to indicate that a page is in need of being copied to the cache
by placing a magic value in folio->private and marking the folios dirty.
Then when the writeback code sees a folio marked in this way, it only
writes it to the cache and not to the server.
If a folio that has this magic value set is modified, the value is just
replaced and the folio will then be uplodaded too.
With this, PG_fscache is no longer required by the netfslib core, 9p and
afs.
Ceph and nfs, however, still need to use the old PG_fscache-based tracking.
To deal with this, a flag, NETFS_ICTX_USE_PGPRIV2, now has to be set on the
flags in the netfs_inode struct for those filesystems. This reenables the
use of PG_fscache in that inode. 9p and afs use the netfslib write helpers
so get switched over; cifs, for the moment, does page-by-page manual access
to the cache, so doesn't use PG_fscache and is unaffected.
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
cc: Shyam Prasad N <sprasad@microsoft.com>
cc: Tom Talpey <tom@talpey.com>
cc: Bharath SM <bharathsm@microsoft.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna@kernel.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
2024-03-19 10:00:09 +00:00
|
|
|
if (netfs_group &&
|
|
|
|
netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
|
|
|
|
refcount_dec_and_test(&netfs_group->ref))
|
2023-09-29 17:28:25 +01:00
|
|
|
netfs_group->free(netfs_group);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
|
|
|
|
*/
|
|
|
|
static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
|
|
|
|
{
|
netfs: Replace PG_fscache by setting folio->private and marking dirty
When dirty data is being written to the cache, setting/waiting on/clearing
the fscache flag is always done in tandem with setting/waiting on/clearing
the writeback flag. The netfslib buffered write routines wait on and set
both flags and the write request cleanup clears both flags, so the fscache
flag is almost superfluous.
The reason it isn't superfluous is because the fscache flag is also used to
indicate that data just read from the server is being written to the cache.
The flag is used to prevent a race involving overlapping direct-I/O writes
to the cache.
Change this to indicate that a page is in need of being copied to the cache
by placing a magic value in folio->private and marking the folios dirty.
Then when the writeback code sees a folio marked in this way, it only
writes it to the cache and not to the server.
If a folio that has this magic value set is modified, the value is just
replaced and the folio will then be uplodaded too.
With this, PG_fscache is no longer required by the netfslib core, 9p and
afs.
Ceph and nfs, however, still need to use the old PG_fscache-based tracking.
To deal with this, a flag, NETFS_ICTX_USE_PGPRIV2, now has to be set on the
flags in the netfs_inode struct for those filesystems. This reenables the
use of PG_fscache in that inode. 9p and afs use the netfslib write helpers
so get switched over; cifs, for the moment, does page-by-page manual access
to the cache, so doesn't use PG_fscache and is unaffected.
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
cc: Shyam Prasad N <sprasad@microsoft.com>
cc: Tom Talpey <tom@talpey.com>
cc: Bharath SM <bharathsm@microsoft.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna@kernel.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
2024-03-19 10:00:09 +00:00
|
|
|
if (netfs_group &&
|
|
|
|
netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
|
|
|
|
refcount_sub_and_test(nr, &netfs_group->ref))
|
2023-09-29 17:28:25 +01:00
|
|
|
netfs_group->free(netfs_group);
|
|
|
|
}
|
|
|
|
|
2023-11-20 15:55:18 +00:00
|
|
|
/*
|
|
|
|
* fscache-cache.c
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
extern const struct seq_operations fscache_caches_seq_ops;
|
|
|
|
#endif
|
|
|
|
bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
|
|
|
|
void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
|
|
|
|
struct fscache_cache *fscache_lookup_cache(const char *name, bool is_cache);
|
|
|
|
void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where);
|
|
|
|
|
|
|
|
static inline enum fscache_cache_state fscache_cache_state(const struct fscache_cache *cache)
|
|
|
|
{
|
|
|
|
return smp_load_acquire(&cache->state);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool fscache_cache_is_live(const struct fscache_cache *cache)
|
|
|
|
{
|
|
|
|
return fscache_cache_state(cache) == FSCACHE_CACHE_IS_ACTIVE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void fscache_set_cache_state(struct fscache_cache *cache,
|
|
|
|
enum fscache_cache_state new_state)
|
|
|
|
{
|
|
|
|
smp_store_release(&cache->state, new_state);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache,
|
|
|
|
enum fscache_cache_state old_state,
|
|
|
|
enum fscache_cache_state new_state)
|
|
|
|
{
|
|
|
|
return try_cmpxchg_release(&cache->state, &old_state, new_state);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* fscache-cookie.c
|
|
|
|
*/
|
|
|
|
extern struct kmem_cache *fscache_cookie_jar;
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
extern const struct seq_operations fscache_cookies_seq_ops;
|
|
|
|
#endif
|
|
|
|
extern struct timer_list fscache_cookie_lru_timer;
|
|
|
|
|
|
|
|
extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix);
|
|
|
|
extern bool fscache_begin_cookie_access(struct fscache_cookie *cookie,
|
|
|
|
enum fscache_access_trace why);
|
|
|
|
|
|
|
|
static inline void fscache_see_cookie(struct fscache_cookie *cookie,
|
|
|
|
enum fscache_cookie_trace where)
|
|
|
|
{
|
|
|
|
trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref),
|
|
|
|
where);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* fscache-main.c
|
|
|
|
*/
|
|
|
|
extern unsigned int fscache_hash(unsigned int salt, const void *data, size_t len);
|
2023-11-21 15:43:52 +00:00
|
|
|
#ifdef CONFIG_FSCACHE
|
|
|
|
int __init fscache_init(void);
|
|
|
|
void __exit fscache_exit(void);
|
|
|
|
#else
|
|
|
|
static inline int fscache_init(void) { return 0; }
|
|
|
|
static inline void fscache_exit(void) {}
|
|
|
|
#endif
|
2023-11-20 15:55:18 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* fscache-proc.c
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
extern int __init fscache_proc_init(void);
|
|
|
|
extern void fscache_proc_cleanup(void);
|
|
|
|
#else
|
|
|
|
#define fscache_proc_init() (0)
|
|
|
|
#define fscache_proc_cleanup() do {} while (0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* fscache-stats.c
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_FSCACHE_STATS
|
|
|
|
extern atomic_t fscache_n_volumes;
|
|
|
|
extern atomic_t fscache_n_volumes_collision;
|
|
|
|
extern atomic_t fscache_n_volumes_nomem;
|
|
|
|
extern atomic_t fscache_n_cookies;
|
|
|
|
extern atomic_t fscache_n_cookies_lru;
|
|
|
|
extern atomic_t fscache_n_cookies_lru_expired;
|
|
|
|
extern atomic_t fscache_n_cookies_lru_removed;
|
|
|
|
extern atomic_t fscache_n_cookies_lru_dropped;
|
|
|
|
|
|
|
|
extern atomic_t fscache_n_acquires;
|
|
|
|
extern atomic_t fscache_n_acquires_ok;
|
|
|
|
extern atomic_t fscache_n_acquires_oom;
|
|
|
|
|
|
|
|
extern atomic_t fscache_n_invalidates;
|
|
|
|
|
|
|
|
extern atomic_t fscache_n_relinquishes;
|
|
|
|
extern atomic_t fscache_n_relinquishes_retire;
|
|
|
|
extern atomic_t fscache_n_relinquishes_dropped;
|
|
|
|
|
|
|
|
extern atomic_t fscache_n_resizes;
|
|
|
|
extern atomic_t fscache_n_resizes_null;
|
|
|
|
|
|
|
|
static inline void fscache_stat(atomic_t *stat)
|
|
|
|
{
|
|
|
|
atomic_inc(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void fscache_stat_d(atomic_t *stat)
|
|
|
|
{
|
|
|
|
atomic_dec(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define __fscache_stat(stat) (stat)
|
|
|
|
|
2023-11-21 15:43:52 +00:00
|
|
|
int fscache_stats_show(struct seq_file *m);
|
2023-11-20 15:55:18 +00:00
|
|
|
#else
|
|
|
|
|
|
|
|
#define __fscache_stat(stat) (NULL)
|
|
|
|
#define fscache_stat(stat) do {} while (0)
|
|
|
|
#define fscache_stat_d(stat) do {} while (0)
|
2023-11-21 15:43:52 +00:00
|
|
|
|
|
|
|
static inline int fscache_stats_show(struct seq_file *m) { return 0; }
|
2023-11-20 15:55:18 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* fscache-volume.c
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
extern const struct seq_operations fscache_volumes_seq_ops;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
|
|
|
|
enum fscache_volume_trace where);
|
|
|
|
bool fscache_begin_volume_access(struct fscache_volume *volume,
|
|
|
|
struct fscache_cookie *cookie,
|
|
|
|
enum fscache_access_trace why);
|
|
|
|
void fscache_create_volume(struct fscache_volume *volume, bool wait);
|
|
|
|
|
netfs: Provide readahead and readpage netfs helpers
Add a pair of helper functions:
(*) netfs_readahead()
(*) netfs_readpage()
to do the work of handling a readahead or a readpage, where the page(s)
that form part of the request may be split between the local cache, the
server or just require clearing, and may be single pages and transparent
huge pages. This is all handled within the helper.
Note that while both will read from the cache if there is data present,
only netfs_readahead() will expand the request beyond what it was asked to
do, and only netfs_readahead() will write back to the cache.
netfs_readpage(), on the other hand, is synchronous and only fetches the
page (which might be a THP) it is asked for.
The netfs gives the helper parameters from the VM, the cache cookie it
wants to use (or NULL) and a table of operations (only one of which is
mandatory):
(*) expand_readahead() [optional]
Called to allow the netfs to request an expansion of a readahead
request to meet its own alignment requirements. This is done by
changing rreq->start and rreq->len.
(*) clamp_length() [optional]
Called to allow the netfs to cut down a subrequest to meet its own
boundary requirements. If it does this, the helper will generate
additional subrequests until the full request is satisfied.
(*) is_still_valid() [optional]
Called to find out if the data just read from the cache has been
invalidated and must be reread from the server.
(*) issue_op() [required]
Called to ask the netfs to issue a read to the server. The subrequest
describes the read. The read request holds information about the file
being accessed.
The netfs can cache information in rreq->netfs_priv.
Upon completion, the netfs should set the error, transferred and can
also set FSCACHE_SREQ_CLEAR_TAIL and then call
fscache_subreq_terminated().
(*) done() [optional]
Called after the pages have been unlocked. The read request is still
pinning the file and mapping and may still be pinning pages with
PG_fscache. rreq->error indicates any error that has been
accumulated.
(*) cleanup() [optional]
Called when the helper is disposing of a finished read request. This
allows the netfs to clear rreq->netfs_priv.
Netfs support is enabled with CONFIG_NETFS_SUPPORT=y. It will be built
even if CONFIG_FSCACHE=n and in this case much of it should be optimised
away, allowing the filesystem to use it even when caching is disabled.
Changes:
v5:
- Comment why netfs_readahead() is putting pages[2].
- Use page_file_mapping() rather than page->mapping[2].
- Use page_index() rather than page->index[2].
- Use set_page_fscache()[3] rather then SetPageFsCache() as this takes an
appropriate ref too[4].
v4:
- Folded in a kerneldoc comment fix.
- Folded in a fix for the error handling in the case that ENOMEM occurs.
- Added flag to netfs_subreq_terminated() to indicate that the caller may
have been running async and stuff that might sleep needs punting to a
workqueue (can't use in_softirq()[1]).
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-and-tested-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Dave Wysochanski <dwysocha@redhat.com>
Tested-By: Marc Dionne <marc.dionne@auristor.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1]
Link: https://lore.kernel.org/r/20210321014202.GF3420@casper.infradead.org/ [2]
Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [3]
Link: https://lore.kernel.org/r/CAHk-=wh+2gbF7XEjYc=HV9w_2uVzVf7vs60BPz0gFA=+pUm3ww@mail.gmail.com/ [4]
Link: https://lore.kernel.org/r/160588497406.3465195.18003475695899726222.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161118136849.1232039.8923686136144228724.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161161032290.2537118.13400578415247339173.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/161340394873.1303470.6237319335883242536.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/161539537375.286939.16642940088716990995.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/161653795430.2770958.4947584573720000554.stgit@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/161789076581.6155.6745849361504760209.stgit@warthog.procyon.org.uk/ # v6
2020-05-13 17:41:20 +01:00
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
|
|
* debug tracing
|
|
|
|
*/
|
|
|
|
#define dbgprintk(FMT, ...) \
|
2024-07-18 21:07:32 +01:00
|
|
|
printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
|
netfs: Provide readahead and readpage netfs helpers
Add a pair of helper functions:
(*) netfs_readahead()
(*) netfs_readpage()
to do the work of handling a readahead or a readpage, where the page(s)
that form part of the request may be split between the local cache, the
server or just require clearing, and may be single pages and transparent
huge pages. This is all handled within the helper.
Note that while both will read from the cache if there is data present,
only netfs_readahead() will expand the request beyond what it was asked to
do, and only netfs_readahead() will write back to the cache.
netfs_readpage(), on the other hand, is synchronous and only fetches the
page (which might be a THP) it is asked for.
The netfs gives the helper parameters from the VM, the cache cookie it
wants to use (or NULL) and a table of operations (only one of which is
mandatory):
(*) expand_readahead() [optional]
Called to allow the netfs to request an expansion of a readahead
request to meet its own alignment requirements. This is done by
changing rreq->start and rreq->len.
(*) clamp_length() [optional]
Called to allow the netfs to cut down a subrequest to meet its own
boundary requirements. If it does this, the helper will generate
additional subrequests until the full request is satisfied.
(*) is_still_valid() [optional]
Called to find out if the data just read from the cache has been
invalidated and must be reread from the server.
(*) issue_op() [required]
Called to ask the netfs to issue a read to the server. The subrequest
describes the read. The read request holds information about the file
being accessed.
The netfs can cache information in rreq->netfs_priv.
Upon completion, the netfs should set the error, transferred and can
also set FSCACHE_SREQ_CLEAR_TAIL and then call
fscache_subreq_terminated().
(*) done() [optional]
Called after the pages have been unlocked. The read request is still
pinning the file and mapping and may still be pinning pages with
PG_fscache. rreq->error indicates any error that has been
accumulated.
(*) cleanup() [optional]
Called when the helper is disposing of a finished read request. This
allows the netfs to clear rreq->netfs_priv.
Netfs support is enabled with CONFIG_NETFS_SUPPORT=y. It will be built
even if CONFIG_FSCACHE=n and in this case much of it should be optimised
away, allowing the filesystem to use it even when caching is disabled.
Changes:
v5:
- Comment why netfs_readahead() is putting pages[2].
- Use page_file_mapping() rather than page->mapping[2].
- Use page_index() rather than page->index[2].
- Use set_page_fscache()[3] rather then SetPageFsCache() as this takes an
appropriate ref too[4].
v4:
- Folded in a kerneldoc comment fix.
- Folded in a fix for the error handling in the case that ENOMEM occurs.
- Added flag to netfs_subreq_terminated() to indicate that the caller may
have been running async and stuff that might sleep needs punting to a
workqueue (can't use in_softirq()[1]).
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-and-tested-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Dave Wysochanski <dwysocha@redhat.com>
Tested-By: Marc Dionne <marc.dionne@auristor.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20210216084230.GA23669@lst.de/ [1]
Link: https://lore.kernel.org/r/20210321014202.GF3420@casper.infradead.org/ [2]
Link: https://lore.kernel.org/r/2499407.1616505440@warthog.procyon.org.uk/ [3]
Link: https://lore.kernel.org/r/CAHk-=wh+2gbF7XEjYc=HV9w_2uVzVf7vs60BPz0gFA=+pUm3ww@mail.gmail.com/ [4]
Link: https://lore.kernel.org/r/160588497406.3465195.18003475695899726222.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161118136849.1232039.8923686136144228724.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/161161032290.2537118.13400578415247339173.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/161340394873.1303470.6237319335883242536.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/161539537375.286939.16642940088716990995.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/161653795430.2770958.4947584573720000554.stgit@warthog.procyon.org.uk/ # v5
Link: https://lore.kernel.org/r/161789076581.6155.6745849361504760209.stgit@warthog.procyon.org.uk/ # v6
2020-05-13 17:41:20 +01:00
|
|
|
|
|
|
|
#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
|
|
|
|
#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
|
|
|
|
#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
|
|
|
|
|
2024-07-18 21:07:32 +01:00
|
|
|
#ifdef __KDEBUG
|
|
|
|
#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
|
|
|
|
#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
|
|
|
|
#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
|
|
|
|
|
|
|
|
#elif defined(CONFIG_NETFS_DEBUG)
|
|
|
|
#define _enter(FMT, ...) \
|
|
|
|
do { \
|
|
|
|
if (netfs_debug) \
|
|
|
|
kenter(FMT, ##__VA_ARGS__); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define _leave(FMT, ...) \
|
|
|
|
do { \
|
|
|
|
if (netfs_debug) \
|
|
|
|
kleave(FMT, ##__VA_ARGS__); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define _debug(FMT, ...) \
|
|
|
|
do { \
|
|
|
|
if (netfs_debug) \
|
|
|
|
kdebug(FMT, ##__VA_ARGS__); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#else
|
|
|
|
#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
|
|
|
|
#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
|
|
|
|
#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
|
|
|
|
#endif
|
|
|
|
|
2023-11-20 15:55:18 +00:00
|
|
|
/*
|
|
|
|
* assertions
|
|
|
|
*/
|
|
|
|
#if 1 /* defined(__KDEBUGALL) */
|
|
|
|
|
|
|
|
#define ASSERT(X) \
|
|
|
|
do { \
|
|
|
|
if (unlikely(!(X))) { \
|
|
|
|
pr_err("\n"); \
|
|
|
|
pr_err("Assertion failed\n"); \
|
|
|
|
BUG(); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define ASSERTCMP(X, OP, Y) \
|
|
|
|
do { \
|
|
|
|
if (unlikely(!((X) OP (Y)))) { \
|
|
|
|
pr_err("\n"); \
|
|
|
|
pr_err("Assertion failed\n"); \
|
|
|
|
pr_err("%lx " #OP " %lx is false\n", \
|
|
|
|
(unsigned long)(X), (unsigned long)(Y)); \
|
|
|
|
BUG(); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define ASSERTIF(C, X) \
|
|
|
|
do { \
|
|
|
|
if (unlikely((C) && !(X))) { \
|
|
|
|
pr_err("\n"); \
|
|
|
|
pr_err("Assertion failed\n"); \
|
|
|
|
BUG(); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define ASSERTIFCMP(C, X, OP, Y) \
|
|
|
|
do { \
|
|
|
|
if (unlikely((C) && !((X) OP (Y)))) { \
|
|
|
|
pr_err("\n"); \
|
|
|
|
pr_err("Assertion failed\n"); \
|
|
|
|
pr_err("%lx " #OP " %lx is false\n", \
|
|
|
|
(unsigned long)(X), (unsigned long)(Y)); \
|
|
|
|
BUG(); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define ASSERT(X) do {} while (0)
|
|
|
|
#define ASSERTCMP(X, OP, Y) do {} while (0)
|
|
|
|
#define ASSERTIF(C, X) do {} while (0)
|
|
|
|
#define ASSERTIFCMP(C, X, OP, Y) do {} while (0)
|
|
|
|
|
|
|
|
#endif /* assert or not */
|