linux-stable/include/linux/nfs_page.h
Dave Wysochanski 000dbe0bec NFS: Convert buffered read paths to use netfs when fscache is enabled
Convert the NFS buffered read code paths to corresponding netfs APIs,
but only when fscache is configured and enabled.

The netfs API defines struct netfs_request_ops which must be filled
in by the network filesystem.  For NFS, we only need to define 5 of
the functions, the main one being the issue_read() function.
The issue_read() function is called by the netfs layer when a read
cannot be fulfilled locally, and must be sent to the server (either
the cache is not active, or it is active but the data is not available).
Once the read from the server is complete, netfs requires a call to
netfs_subreq_terminated() which conveys either how many bytes were read
successfully, or an error.  Note that issue_read() is called with a
structure, netfs_io_subrequest, which defines the IO requested, and
contains a start and a length (both in bytes), and assumes the underlying
netfs will return a either an error on the whole region, or the number
of bytes successfully read.

The NFS IO path is page based and the main APIs are the pgio APIs defined
in pagelist.c.  For the pgio APIs, there is no way for the caller to
know how many RPCs will be sent and how the pages will be broken up
into underlying RPCs, each of which will have their own completion and
return code.  In contrast, netfs is subrequest based, a single
subrequest may contain multiple pages, and a single subrequest is
initiated with issue_read() and terminated with netfs_subreq_terminated().
Thus, to utilze the netfs APIs, NFS needs some way to accommodate
the netfs API requirement on the single response to the whole
subrequest, while also minimizing disruptive changes to the NFS
pgio layer.

The approach taken with this patch is to allocate a small structure
for each nfs_netfs_issue_read() call, store the final error and number
of bytes successfully transferred in the structure, and update these values
as each RPC completes.  The refcount on the structure is used as a marker
for the last RPC completion, is incremented in nfs_netfs_read_initiate(),
and decremented inside nfs_netfs_read_completion(), when a nfs_pgio_header
contains a valid pointer to the data.  On the final put (which signals
the final outstanding RPC is complete) in nfs_netfs_read_completion(),
call netfs_subreq_terminated() with either the final error value (if
one or more READs complete with an error) or the number of bytes
successfully transferred (if all RPCs complete successfully).  Note
that when all RPCs complete successfully, the number of bytes transferred
is capped to the length of the subrequest.  Capping the transferred length
to the subrequest length prevents "Subreq overread" warnings from netfs.
This is due to the "aligned_len" in nfs_pageio_add_page(), and the
corner case where NFS requests a full page at the end of the file,
even when i_size reflects only a partial page (NFS overread).

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Tested-by: Daire Byrne <daire@dneg.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
2023-04-11 13:08:26 -04:00

289 lines
8.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* linux/include/linux/nfs_page.h
*
* Copyright (C) 2000 Trond Myklebust
*
* NFS page cache wrapper.
*/
#ifndef _LINUX_NFS_PAGE_H
#define _LINUX_NFS_PAGE_H
#include <linux/list.h>
#include <linux/pagemap.h>
#include <linux/wait.h>
#include <linux/sunrpc/auth.h>
#include <linux/nfs_xdr.h>
#include <linux/kref.h>
/*
* Valid flags for a dirty buffer
*/
enum {
PG_BUSY = 0, /* nfs_{un}lock_request */
PG_MAPPED, /* page private set for buffered io */
PG_FOLIO, /* Tracking a folio (unset for O_DIRECT) */
PG_CLEAN, /* write succeeded */
PG_COMMIT_TO_DS, /* used by pnfs layouts */
PG_INODE_REF, /* extra ref held by inode when in writeback */
PG_HEADLOCK, /* page group lock of wb_head */
PG_TEARDOWN, /* page group sync for destroy */
PG_UNLOCKPAGE, /* page group sync bit in read path */
PG_UPTODATE, /* page group sync bit in read path */
PG_WB_END, /* page group sync bit in write path */
PG_REMOVE, /* page group sync bit in write path */
PG_CONTENDED1, /* Is someone waiting for a lock? */
PG_CONTENDED2, /* Is someone waiting for a lock? */
};
struct nfs_inode;
struct nfs_page {
struct list_head wb_list; /* Defines state of page: */
union {
struct page *wb_page; /* page to read in/write out */
struct folio *wb_folio;
};
struct nfs_lock_context *wb_lock_context; /* lock context info */
pgoff_t wb_index; /* Offset >> PAGE_SHIFT */
unsigned int wb_offset, /* Offset & ~PAGE_MASK */
wb_pgbase, /* Start of page data */
wb_bytes; /* Length of request */
struct kref wb_kref; /* reference count */
unsigned long wb_flags;
struct nfs_write_verifier wb_verf; /* Commit cookie */
struct nfs_page *wb_this_page; /* list of reqs for this page */
struct nfs_page *wb_head; /* head pointer for req list */
unsigned short wb_nio; /* Number of I/O attempts */
};
struct nfs_pgio_mirror;
struct nfs_pageio_descriptor;
struct nfs_pageio_ops {
void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *,
struct nfs_page *);
int (*pg_doio)(struct nfs_pageio_descriptor *);
unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *,
struct nfs_page *);
void (*pg_cleanup)(struct nfs_pageio_descriptor *);
struct nfs_pgio_mirror *
(*pg_get_mirror)(struct nfs_pageio_descriptor *, u32);
u32 (*pg_set_mirror)(struct nfs_pageio_descriptor *, u32);
};
struct nfs_rw_ops {
struct nfs_pgio_header *(*rw_alloc_header)(void);
void (*rw_free_header)(struct nfs_pgio_header *);
int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
struct inode *);
void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *);
void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *,
const struct nfs_rpc_ops *,
struct rpc_task_setup *, int);
};
struct nfs_pgio_mirror {
struct list_head pg_list;
unsigned long pg_bytes_written;
size_t pg_count;
size_t pg_bsize;
unsigned int pg_base;
unsigned char pg_recoalesce : 1;
};
struct nfs_pageio_descriptor {
struct inode *pg_inode;
const struct nfs_pageio_ops *pg_ops;
const struct nfs_rw_ops *pg_rw_ops;
int pg_ioflags;
int pg_error;
const struct rpc_call_ops *pg_rpc_callops;
const struct nfs_pgio_completion_ops *pg_completion_ops;
struct pnfs_layout_segment *pg_lseg;
struct nfs_io_completion *pg_io_completion;
struct nfs_direct_req *pg_dreq;
#ifdef CONFIG_NFS_FSCACHE
void *pg_netfs;
#endif
unsigned int pg_bsize; /* default bsize for mirrors */
u32 pg_mirror_count;
struct nfs_pgio_mirror *pg_mirrors;
struct nfs_pgio_mirror pg_mirrors_static[1];
struct nfs_pgio_mirror *pg_mirrors_dynamic;
u32 pg_mirror_idx; /* current mirror */
unsigned short pg_maxretrans;
unsigned char pg_moreio : 1;
};
/* arbitrarily selected limit to number of mirrors */
#define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16
#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
extern struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx,
struct page *page,
unsigned int pgbase,
loff_t offset,
unsigned int count);
extern struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx,
struct folio *folio,
unsigned int offset,
unsigned int count);
extern void nfs_release_request(struct nfs_page *);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
const struct nfs_pageio_ops *pg_ops,
const struct nfs_pgio_completion_ops *compl_ops,
const struct nfs_rw_ops *rw_ops,
size_t bsize,
int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
struct nfs_page *);
extern int nfs_pageio_resend(struct nfs_pageio_descriptor *,
struct nfs_pgio_header *);
extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
struct nfs_page *prev,
struct nfs_page *req);
extern int nfs_wait_on_request(struct nfs_page *);
extern void nfs_unlock_request(struct nfs_page *req);
extern void nfs_unlock_and_release_request(struct nfs_page *);
extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req);
extern int nfs_page_group_lock_subrequests(struct nfs_page *head);
extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode);
extern int nfs_page_group_lock(struct nfs_page *);
extern void nfs_page_group_unlock(struct nfs_page *);
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
extern int nfs_page_set_headlock(struct nfs_page *req);
extern void nfs_page_clear_headlock(struct nfs_page *req);
extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
/**
* nfs_page_to_folio - Retrieve a struct folio for the request
* @req: pointer to a struct nfs_page
*
* If a folio was assigned to @req, then return it, otherwise return NULL.
*/
static inline struct folio *nfs_page_to_folio(const struct nfs_page *req)
{
if (test_bit(PG_FOLIO, &req->wb_flags))
return req->wb_folio;
return NULL;
}
/**
* nfs_page_to_page - Retrieve a struct page for the request
* @req: pointer to a struct nfs_page
* @pgbase: folio byte offset
*
* Return the page containing the byte that is at offset @pgbase relative
* to the start of the folio.
* Note: The request starts at offset @req->wb_pgbase.
*/
static inline struct page *nfs_page_to_page(const struct nfs_page *req,
size_t pgbase)
{
struct folio *folio = nfs_page_to_folio(req);
if (folio == NULL)
return req->wb_page;
return folio_page(folio, pgbase >> PAGE_SHIFT);
}
/**
* nfs_page_to_inode - Retrieve an inode for the request
* @req: pointer to a struct nfs_page
*/
static inline struct inode *nfs_page_to_inode(const struct nfs_page *req)
{
struct folio *folio = nfs_page_to_folio(req);
if (folio == NULL)
return page_file_mapping(req->wb_page)->host;
return folio_file_mapping(folio)->host;
}
/**
* nfs_page_max_length - Retrieve the maximum possible length for a request
* @req: pointer to a struct nfs_page
*
* Returns the maximum possible length of a request
*/
static inline size_t nfs_page_max_length(const struct nfs_page *req)
{
struct folio *folio = nfs_page_to_folio(req);
if (folio == NULL)
return PAGE_SIZE;
return folio_size(folio);
}
/*
* Lock the page of an asynchronous request
*/
static inline int
nfs_lock_request(struct nfs_page *req)
{
return !test_and_set_bit(PG_BUSY, &req->wb_flags);
}
/**
* nfs_list_add_request - Insert a request into a list
* @req: request
* @head: head of list into which to insert the request.
*/
static inline void
nfs_list_add_request(struct nfs_page *req, struct list_head *head)
{
list_add_tail(&req->wb_list, head);
}
/**
* nfs_list_move_request - Move a request to a new list
* @req: request
* @head: head of list into which to insert the request.
*/
static inline void
nfs_list_move_request(struct nfs_page *req, struct list_head *head)
{
list_move_tail(&req->wb_list, head);
}
/**
* nfs_list_remove_request - Remove a request from its wb_list
* @req: request
*/
static inline void
nfs_list_remove_request(struct nfs_page *req)
{
if (list_empty(&req->wb_list))
return;
list_del_init(&req->wb_list);
}
static inline struct nfs_page *
nfs_list_entry(struct list_head *head)
{
return list_entry(head, struct nfs_page, wb_list);
}
static inline loff_t req_offset(const struct nfs_page *req)
{
return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset;
}
static inline struct nfs_open_context *
nfs_req_openctx(struct nfs_page *req)
{
return req->wb_lock_context->open_context;
}
#endif /* _LINUX_NFS_PAGE_H */