From e5decb2eb5f4d1f64ba9196b4bad0e26a441c81c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Nov 2020 14:47:14 -0500 Subject: [PATCH 001/131] svcrdma: Catch another Reply chunk overflow case When space in the Reply chunk runs out in the middle of a segment, we end up passing a zero-length SGL to rdma_rw_ctx_init(), and it oopses. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 80a0c0e87590..7c50eddb8d3c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -460,6 +460,8 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, offset += info->wi_seg_off; write_len = min(remaining, length - info->wi_seg_off); + if (!write_len) + goto out_overflow; ctxt = svc_rdma_get_rw_ctxt(rdma, (write_len >> PAGE_SHIFT) + 2); if (!ctxt) From 5a7e702670adc368caa1b64c2138956d8cba0d4f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:09 -0400 Subject: [PATCH 002/131] SUNRPC: Adjust synopsis of xdr_buf_subsegment() Clean up: This enables xdr_buf_subsegment()'s callers to pass in a const pointer to that buffer. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 3 ++- net/sunrpc/xdr.c | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9548d075e06d..ec2a22ccdc2a 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -183,7 +183,8 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) */ extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); -extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf, + unsigned int base, unsigned int len); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 71e03b930b70..28f81769a27c 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1379,9 +1379,8 @@ EXPORT_SYMBOL_GPL(xdr_buf_from_iov); * * Returns -1 if base of length are out of bounds. */ -int -xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, - unsigned int base, unsigned int len) +int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf, + unsigned int base, unsigned int len) { subbuf->buflen = subbuf->len = len; if (base < buf->head[0].iov_len) { From 51bad8cc1301f14ebf6840a6d8098520553ed5d5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:10 -0400 Subject: [PATCH 003/131] svcrdma: Const-ify the xdr_buf arguments Clean up: Ensure the code in rw.c does not modify the argument, and enable callers to also use "const struct xdr_buf *". Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 7c50eddb8d3c..aeed4e200949 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -197,7 +197,7 @@ struct svc_rdma_write_info { __be32 *wi_segs; /* SGL constructor arguments */ - struct xdr_buf *wi_xdr; + const struct xdr_buf *wi_xdr; unsigned char *wi_base; unsigned int wi_next_off; @@ -405,7 +405,7 @@ static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info, struct svc_rdma_rw_ctxt *ctxt) { unsigned int sge_no, sge_bytes, page_off, page_no; - struct xdr_buf *xdr = info->wi_xdr; + const struct xdr_buf *xdr = info->wi_xdr; struct scatterlist *sg; struct page **page; From ab1394ee7a110190c2ea38e1f1b72737daa10f20 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:10 -0400 Subject: [PATCH 004/131] svcrdma: Refactor the RDMA Write path Refactor for subsequent changes. Constify the xdr_buf argument to ensure the code here does not modify it, and to enable callers to pass in a "const struct xdr_buf *". At the same time, rename the helper functions, which emit RDMA Writes, not RDMA Sends, and add documenting comments. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 56 +++++++++++++++++++------------ 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index aeed4e200949..a75f8a133bef 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -495,27 +495,42 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, return -E2BIG; } -/* Send one of an xdr_buf's kvecs by itself. To send a Reply - * chunk, the whole RPC Reply is written back to the client. - * This function writes either the head or tail of the xdr_buf - * containing the Reply. +/** + * svc_rdma_iov_write - Construct RDMA Writes from an iov + * @info: pointer to write arguments + * @iov: kvec to write + * + * Returns: + * On succes, returns zero + * %-E2BIG if the client-provided Write chunk is too small + * %-ENOMEM if a resource has been exhausted + * %-EIO if an rdma-rw error occurred */ -static int svc_rdma_send_xdr_kvec(struct svc_rdma_write_info *info, - struct kvec *vec) +static int svc_rdma_iov_write(struct svc_rdma_write_info *info, + const struct kvec *iov) { - info->wi_base = vec->iov_base; + info->wi_base = iov->iov_base; return svc_rdma_build_writes(info, svc_rdma_vec_to_sg, - vec->iov_len); + iov->iov_len); } -/* Send an xdr_buf's page list by itself. A Write chunk is just - * the page list. A Reply chunk is @xdr's head, page list, and - * tail. This function is shared between the two types of chunk. +/** + * svc_rdma_pages_write - Construct RDMA Writes from pages + * @info: pointer to write arguments + * @xdr: xdr_buf with pages to write + * @offset: offset into the content of @xdr + * @length: number of bytes to write + * + * Returns: + * On succes, returns zero + * %-E2BIG if the client-provided Write chunk is too small + * %-ENOMEM if a resource has been exhausted + * %-EIO if an rdma-rw error occurred */ -static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info, - struct xdr_buf *xdr, - unsigned int offset, - unsigned long length) +static int svc_rdma_pages_write(struct svc_rdma_write_info *info, + const struct xdr_buf *xdr, + unsigned int offset, + unsigned long length) { info->wi_xdr = xdr; info->wi_next_off = offset - xdr->head[0].iov_len; @@ -552,7 +567,7 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, if (!info) return -ENOMEM; - ret = svc_rdma_send_xdr_pagelist(info, xdr, offset, length); + ret = svc_rdma_pages_write(info, xdr, offset, length); if (ret < 0) goto out_err; @@ -592,7 +607,7 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, if (!info) return -ENOMEM; - ret = svc_rdma_send_xdr_kvec(info, &xdr->head[0]); + ret = svc_rdma_iov_write(info, &xdr->head[0]); if (ret < 0) goto out_err; consumed = xdr->head[0].iov_len; @@ -601,16 +616,15 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, * client did not provide Write chunks. */ if (!rctxt->rc_write_list && xdr->page_len) { - ret = svc_rdma_send_xdr_pagelist(info, xdr, - xdr->head[0].iov_len, - xdr->page_len); + ret = svc_rdma_pages_write(info, xdr, xdr->head[0].iov_len, + xdr->page_len); if (ret < 0) goto out_err; consumed += xdr->page_len; } if (xdr->tail[0].iov_len) { - ret = svc_rdma_send_xdr_kvec(info, &xdr->tail[0]); + ret = svc_rdma_iov_write(info, &xdr->tail[0]); if (ret < 0) goto out_err; consumed += xdr->tail[0].iov_len; From 03493bca084fdca48abc59b00e06ce733aa9eb7d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 10 Jun 2020 10:36:42 -0400 Subject: [PATCH 005/131] SUNRPC: Rename svc_encode_read_payload() Clean up: "result payload" is a less confusing name for these payloads. "READ payload" reflects only the NFS usage. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 2 +- include/linux/sunrpc/svc.h | 6 +++--- include/linux/sunrpc/svc_rdma.h | 4 ++-- include/linux/sunrpc/svc_xprt.h | 4 ++-- net/sunrpc/svc.c | 11 ++++++----- net/sunrpc/svcsock.c | 8 ++++---- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 8 ++++---- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- 8 files changed, 23 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 833a2c64dfe8..7e24fb3ca36e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3829,7 +3829,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, read->rd_length = maxcount; if (nfserr) return nfserr; - if (svc_encode_read_payload(resp->rqstp, starting_len + 8, maxcount)) + if (svc_encode_result_payload(resp->rqstp, starting_len + 8, maxcount)) return nfserr_io; xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount)); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 386628b36bc7..c220b734fa69 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -519,9 +519,9 @@ void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); -int svc_encode_read_payload(struct svc_rqst *rqstp, - unsigned int offset, - unsigned int length); +int svc_encode_result_payload(struct svc_rqst *rqstp, + unsigned int offset, + unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, struct kvec *first, size_t total); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 9dc3a3b88391..2b870a3f391b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -207,8 +207,8 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *rctxt, int status); extern int svc_rdma_sendto(struct svc_rqst *); -extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length); +extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length); /* svc_rdma_transport.c */ extern struct svc_xprt_class svc_rdma_class; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index aca35ab5cff2..92455e0d5244 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -21,8 +21,8 @@ struct svc_xprt_ops { int (*xpo_has_wspace)(struct svc_xprt *); int (*xpo_recvfrom)(struct svc_rqst *); int (*xpo_sendto)(struct svc_rqst *); - int (*xpo_read_payload)(struct svc_rqst *, unsigned int, - unsigned int); + int (*xpo_result_payload)(struct svc_rqst *, unsigned int, + unsigned int); void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c211b607239e..b41500645c3f 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1622,7 +1622,7 @@ u32 svc_max_payload(const struct svc_rqst *rqstp) EXPORT_SYMBOL_GPL(svc_max_payload); /** - * svc_encode_read_payload - mark a range of bytes as a READ payload + * svc_encode_result_payload - mark a range of bytes as a result payload * @rqstp: svc_rqst to operate on * @offset: payload's byte offset in rqstp->rq_res * @length: size of payload, in bytes @@ -1630,12 +1630,13 @@ EXPORT_SYMBOL_GPL(svc_max_payload); * Returns zero on success, or a negative errno if a permanent * error occurred. */ -int svc_encode_read_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length) +int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length) { - return rqstp->rq_xprt->xpt_ops->xpo_read_payload(rqstp, offset, length); + return rqstp->rq_xprt->xpt_ops->xpo_result_payload(rqstp, offset, + length); } -EXPORT_SYMBOL_GPL(svc_encode_read_payload); +EXPORT_SYMBOL_GPL(svc_encode_result_payload); /** * svc_fill_write_vector - Construct data argument for VFS write call diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c2752e2b9ce3..b248f2349437 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -181,8 +181,8 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) } } -static int svc_sock_read_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length) +static int svc_sock_result_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length) { return 0; } @@ -635,7 +635,7 @@ static const struct svc_xprt_ops svc_udp_ops = { .xpo_create = svc_udp_create, .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, - .xpo_read_payload = svc_sock_read_payload, + .xpo_result_payload = svc_sock_result_payload, .xpo_release_rqst = svc_udp_release_rqst, .xpo_detach = svc_sock_detach, .xpo_free = svc_sock_free, @@ -1123,7 +1123,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_create = svc_tcp_create, .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, - .xpo_read_payload = svc_sock_read_payload, + .xpo_result_payload = svc_sock_result_payload, .xpo_release_rqst = svc_tcp_release_rqst, .xpo_detach = svc_tcp_sock_detach, .xpo_free = svc_sock_free, diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index c3d588b149aa..c8411b4f3492 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -979,19 +979,19 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) } /** - * svc_rdma_read_payload - special processing for a READ payload + * svc_rdma_result_payload - special processing for a result payload * @rqstp: svc_rqst to operate on * @offset: payload's byte offset in @xdr * @length: size of payload, in bytes * * Returns zero on success. * - * For the moment, just record the xdr_buf location of the READ + * For the moment, just record the xdr_buf location of the result * payload. svc_rdma_sendto will use that location later when * we actually send the payload. */ -int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length) +int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length) { struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index fb044792b571..afba4e9d5425 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, .xpo_recvfrom = svc_rdma_recvfrom, .xpo_sendto = svc_rdma_sendto, - .xpo_read_payload = svc_rdma_read_payload, + .xpo_result_payload = svc_rdma_result_payload, .xpo_release_rqst = svc_rdma_release_rqst, .xpo_detach = svc_rdma_detach, .xpo_free = svc_rdma_free, From 76e5492b161f555c0fb69cad9eb39a7d8467f5fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 5 Nov 2020 10:24:19 -0500 Subject: [PATCH 006/131] NFSD: Invoke svc_encode_result_payload() in "read" NFSD encoders Have the NFSD encoders annotate the boundaries of every direct-data-placement eligible result data payload. Then change svcrdma to use that annotation instead of the xdr->page_len when handling Write chunks. For NFSv4 on RDMA, that enables the ability to recognize multiple result payloads per compound. This is a pre-requisite for supporting multiple Write chunks per RPC transaction. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 7 +++++ fs/nfsd/nfs4xdr.c | 41 +++++++++++++++++++-------- fs/nfsd/nfsxdr.c | 6 ++++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 24 +++++----------- 4 files changed, 49 insertions(+), 29 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2277f83da250..186b07a72373 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -707,6 +707,7 @@ int nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_readlinkres *resp = rqstp->rq_resp; + struct kvec *head = rqstp->rq_res.head; *p++ = resp->status; p = encode_post_op_attr(rqstp, p, &resp->fh); @@ -720,6 +721,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) *p = 0; rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); } + if (svc_encode_result_payload(rqstp, head->iov_len, resp->len)) + return 0; return 1; } else return xdr_ressize_check(rqstp, p); @@ -730,6 +733,7 @@ int nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_readres *resp = rqstp->rq_resp; + struct kvec *head = rqstp->rq_res.head; *p++ = resp->status; p = encode_post_op_attr(rqstp, p, &resp->fh); @@ -746,6 +750,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) *p = 0; rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); } + if (svc_encode_result_payload(rqstp, head->iov_len, + resp->count)) + return 0; return 1; } else return xdr_ressize_check(rqstp, p); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7e24fb3ca36e..1d2c33cd3a87 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3756,8 +3756,8 @@ static __be32 nfsd4_encode_splice_read( { struct xdr_stream *xdr = &resp->xdr; struct xdr_buf *buf = xdr->buf; + int status, space_left; u32 eof; - int space_left; __be32 nfserr; __be32 *p = xdr->p - 2; @@ -3768,14 +3768,13 @@ static __be32 nfsd4_encode_splice_read( nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, file, read->rd_offset, &maxcount, &eof); read->rd_length = maxcount; - if (nfserr) { - /* - * nfsd_splice_actor may have already messed with the - * page length; reset it so as not to confuse - * xdr_truncate_encode: - */ - buf->page_len = 0; - return nfserr; + if (nfserr) + goto out_err; + status = svc_encode_result_payload(read->rd_rqstp, + buf->head[0].iov_len, maxcount); + if (status) { + nfserr = nfserrno(status); + goto out_err; } *(p++) = htonl(eof); @@ -3806,6 +3805,15 @@ static __be32 nfsd4_encode_splice_read( xdr->end = (__be32 *)((void *)xdr->end + space_left); return 0; + +out_err: + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode in our caller. + */ + buf->page_len = 0; + return nfserr; } static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, @@ -3897,6 +3905,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd int zero = 0; struct xdr_stream *xdr = &resp->xdr; int length_offset = xdr->buf->len; + int status; __be32 *p; p = xdr_reserve_space(xdr, 4); @@ -3917,9 +3926,13 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd (char *)p, &maxcount); if (nfserr == nfserr_isdir) nfserr = nfserr_inval; - if (nfserr) { - xdr_truncate_encode(xdr, length_offset); - return nfserr; + if (nfserr) + goto out_err; + status = svc_encode_result_payload(readlink->rl_rqstp, length_offset, + maxcount); + if (status) { + nfserr = nfserrno(status); + goto out_err; } wire_count = htonl(maxcount); @@ -3929,6 +3942,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero, 4 - (maxcount&3)); return 0; + +out_err: + xdr_truncate_encode(xdr, length_offset); + return nfserr; } static __be32 diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 8a288c8fcd57..9e00a902113e 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -469,6 +469,7 @@ int nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_readlinkres *resp = rqstp->rq_resp; + struct kvec *head = rqstp->rq_res.head; *p++ = resp->status; if (resp->status != nfs_ok) @@ -483,6 +484,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) *p = 0; rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); } + if (svc_encode_result_payload(rqstp, head->iov_len, resp->len)) + return 0; return 1; } @@ -490,6 +493,7 @@ int nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_readres *resp = rqstp->rq_resp; + struct kvec *head = rqstp->rq_res.head; *p++ = resp->status; if (resp->status != nfs_ok) @@ -507,6 +511,8 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) *p = 0; rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); } + if (svc_encode_result_payload(rqstp, head->iov_len, resp->count)) + return 0; return 1; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index c8411b4f3492..d6436c13d5c4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -448,7 +448,6 @@ static ssize_t svc_rdma_encode_write_chunk(__be32 *src, * svc_rdma_encode_write_list - Encode RPC Reply's Write chunk list * @rctxt: Reply context with information about the RPC Call * @sctxt: Send context for the RPC Reply - * @length: size in bytes of the payload in the first Write chunk * * The client provides a Write chunk list in the Call message. Fill * in the segments in the first Write chunk in the Reply's transport @@ -465,12 +464,12 @@ static ssize_t svc_rdma_encode_write_chunk(__be32 *src, */ static ssize_t svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, - struct svc_rdma_send_ctxt *sctxt, - unsigned int length) + struct svc_rdma_send_ctxt *sctxt) { ssize_t len, ret; - ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, length); + ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, + rctxt->rc_read_payload_length); if (ret < 0) return ret; len = ret; @@ -923,21 +922,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) goto err0; if (wr_lst) { /* XXX: Presume the client sent only one Write chunk */ - unsigned long offset; - unsigned int length; - - if (rctxt->rc_read_payload_length) { - offset = rctxt->rc_read_payload_offset; - length = rctxt->rc_read_payload_length; - } else { - offset = xdr->head[0].iov_len; - length = xdr->page_len; - } - ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, offset, - length); + ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, + rctxt->rc_read_payload_offset, + rctxt->rc_read_payload_length); if (ret < 0) goto err2; - if (svc_rdma_encode_write_list(rctxt, sctxt, length) < 0) + if (svc_rdma_encode_write_list(rctxt, sctxt) < 0) goto err0; } else { if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) From f6ad77590a5d432589a5d8a211c4e8e50cd8bb63 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:10 -0400 Subject: [PATCH 007/131] svcrdma: Post RDMA Writes while XDR encoding replies The only RPC/RDMA ordering requirement between RDMA Writes and RDMA Sends is that the responder must post the Writes on the Send queue before posting the Send that conveys the RPC Reply for that Write payload. The Linux NFS server implementation now has a transport method that can post result Payload Writes earlier than svc_rdma_sendto: ->xpo_result_payload() This gets RDMA Writes going earlier so they are more likely to be complete at the remote end before the Send completes. Some care must be taken with pulled-up Replies. We don't want to push the Write chunk and then send the same payload data via Send. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 +- net/sunrpc/xprtrdma/svc_rdma_rw.c | 52 ++++++++++++++++++----- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 60 +++++++++++++++------------ 3 files changed, 77 insertions(+), 39 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 2b870a3f391b..f5a3c852bb90 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -183,9 +183,7 @@ extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - __be32 *wr_ch, struct xdr_buf *xdr, - unsigned int offset, - unsigned long length); + __be32 *wr_ch, const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index a75f8a133bef..f2ed1bf50251 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -538,13 +538,49 @@ static int svc_rdma_pages_write(struct svc_rdma_write_info *info, length); } +/** + * svc_rdma_xb_write - Construct RDMA Writes to write an xdr_buf + * @xdr: xdr_buf to write + * @info: pointer to write arguments + * + * Returns: + * On succes, returns zero + * %-E2BIG if the client-provided Write chunk is too small + * %-ENOMEM if a resource has been exhausted + * %-EIO if an rdma-rw error occurred + */ +static int svc_rdma_xb_write(const struct xdr_buf *xdr, + struct svc_rdma_write_info *info) +{ + int ret; + + if (xdr->head[0].iov_len) { + ret = svc_rdma_iov_write(info, &xdr->head[0]); + if (ret < 0) + return ret; + } + + if (xdr->page_len) { + ret = svc_rdma_pages_write(info, xdr, xdr->head[0].iov_len, + xdr->page_len); + if (ret < 0) + return ret; + } + + if (xdr->tail[0].iov_len) { + ret = svc_rdma_iov_write(info, &xdr->tail[0]); + if (ret < 0) + return ret; + } + + return xdr->len; +} + /** * svc_rdma_send_write_chunk - Write all segments in a Write chunk * @rdma: controlling RDMA transport * @wr_ch: Write chunk provided by client * @xdr: xdr_buf containing the data payload - * @offset: payload's byte offset in @xdr - * @length: size of payload, in bytes * * Returns a non-negative number of bytes the chunk consumed, or * %-E2BIG if the payload was larger than the Write chunk, @@ -554,21 +590,17 @@ static int svc_rdma_pages_write(struct svc_rdma_write_info *info, * %-EIO if rdma_rw initialization failed (DMA mapping, etc). */ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, - struct xdr_buf *xdr, - unsigned int offset, unsigned long length) + const struct xdr_buf *xdr) { struct svc_rdma_write_info *info; int ret; - if (!length) - return 0; - info = svc_rdma_write_info_alloc(rdma, wr_ch); if (!info) return -ENOMEM; - ret = svc_rdma_pages_write(info, xdr, offset, length); - if (ret < 0) + ret = svc_rdma_xb_write(xdr, info); + if (ret != xdr->len) goto out_err; ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); @@ -576,7 +608,7 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, goto out_err; trace_svcrdma_send_write_chunk(xdr->page_len); - return length; + return xdr->len; out_err: svc_rdma_write_info_free(info); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index d6436c13d5c4..e8b0d030e1e6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -468,11 +468,14 @@ svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, { ssize_t len, ret; - ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, - rctxt->rc_read_payload_length); - if (ret < 0) - return ret; - len = ret; + len = 0; + if (rctxt->rc_write_list) { + ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, + rctxt->rc_read_payload_length); + if (ret < 0) + return ret; + len = ret; + } /* Terminate the Write list */ ret = xdr_stream_encode_item_absent(&sctxt->sc_stream); @@ -556,11 +559,13 @@ static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr) { + bool write_chunk_present = rctxt && rctxt->rc_write_list; int elements; /* For small messages, copying bytes is cheaper than DMA mapping. */ - if (sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH) + if (!write_chunk_present && + sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH) return true; /* Check whether the xdr_buf has more elements than can @@ -893,9 +898,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) container_of(xprt, struct svcxprt_rdma, sc_xprt); struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; __be32 *rdma_argp = rctxt->rc_recv_buf; - __be32 *wr_lst = rctxt->rc_write_list; __be32 *rp_ch = rctxt->rc_reply_chunk; - struct xdr_buf *xdr = &rqstp->rq_res; struct svc_rdma_send_ctxt *sctxt; __be32 *p; int ret; @@ -920,19 +923,8 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (svc_rdma_encode_read_list(sctxt) < 0) goto err0; - if (wr_lst) { - /* XXX: Presume the client sent only one Write chunk */ - ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, - rctxt->rc_read_payload_offset, - rctxt->rc_read_payload_length); - if (ret < 0) - goto err2; - if (svc_rdma_encode_write_list(rctxt, sctxt) < 0) - goto err0; - } else { - if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) - goto err0; - } + if (svc_rdma_encode_write_list(rctxt, sctxt) < 0) + goto err0; if (rp_ch) { ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res); if (ret < 0) @@ -974,16 +966,25 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) * @offset: payload's byte offset in @xdr * @length: size of payload, in bytes * - * Returns zero on success. - * - * For the moment, just record the xdr_buf location of the result - * payload. svc_rdma_sendto will use that location later when - * we actually send the payload. + * Return values: + * %0 if successful or nothing needed to be done + * %-EMSGSIZE on XDR buffer overflow + * %-E2BIG if the payload was larger than the Write chunk + * %-EINVAL if client provided too many segments + * %-ENOMEM if rdma_rw context pool was exhausted + * %-ENOTCONN if posting failed (connection is lost) + * %-EIO if rdma_rw initialization failed (DMA mapping, etc) */ int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length) { struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; + struct svcxprt_rdma *rdma; + struct xdr_buf subbuf; + int ret; + + if (!rctxt->rc_write_list || !length) + return 0; /* XXX: Just one READ payload slot for now, since our * transport implementation currently supports only one @@ -992,5 +993,12 @@ int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, rctxt->rc_read_payload_offset = offset; rctxt->rc_read_payload_length = length; + if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length)) + return -EMSGSIZE; + + rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); + ret = svc_rdma_send_write_chunk(rdma, rctxt->rc_write_list, &subbuf); + if (ret < 0) + return ret; return 0; } From ded380f10072c924a17be6ac996019ff6472c9d2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:10 -0400 Subject: [PATCH 008/131] svcrdma: Clean up svc_rdma_encode_reply_chunk() Refactor: Match the control flow of svc_rdma_encode_write_list(). Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 3 +++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 23 +++++++++++------------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index f2ed1bf50251..e6050230b49f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -635,6 +635,9 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, struct svc_rdma_write_info *info; int consumed, ret; + if (!rctxt->rc_reply_chunk) + return 0; + info = svc_rdma_write_info_alloc(rdma, rctxt->rc_reply_chunk); if (!info) return -ENOMEM; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index e8b0d030e1e6..922dbe573570 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -504,6 +504,9 @@ svc_rdma_encode_reply_chunk(const struct svc_rdma_recv_ctxt *rctxt, struct svc_rdma_send_ctxt *sctxt, unsigned int length) { + if (!rctxt->rc_reply_chunk) + return xdr_stream_encode_item_absent(&sctxt->sc_stream); + return svc_rdma_encode_write_chunk(rctxt->rc_reply_chunk, sctxt, length); } @@ -898,7 +901,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) container_of(xprt, struct svcxprt_rdma, sc_xprt); struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; __be32 *rdma_argp = rctxt->rc_recv_buf; - __be32 *rp_ch = rctxt->rc_reply_chunk; struct svc_rdma_send_ctxt *sctxt; __be32 *p; int ret; @@ -916,25 +918,22 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) rpcrdma_fixed_maxsz * sizeof(*p)); if (!p) goto err0; + + ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res); + if (ret < 0) + goto err2; + *p++ = *rdma_argp; *p++ = *(rdma_argp + 1); *p++ = rdma->sc_fc_credits; - *p = rp_ch ? rdma_nomsg : rdma_msg; + *p = rctxt->rc_reply_chunk ? rdma_nomsg : rdma_msg; if (svc_rdma_encode_read_list(sctxt) < 0) goto err0; if (svc_rdma_encode_write_list(rctxt, sctxt) < 0) goto err0; - if (rp_ch) { - ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res); - if (ret < 0) - goto err2; - if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0) - goto err0; - } else { - if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) - goto err0; - } + if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0) + goto err0; ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); if (ret < 0) From 78147ca8b4a9b6cf0e597ddd6bf17959e08376c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 22 Jun 2020 10:15:41 -0400 Subject: [PATCH 009/131] svcrdma: Add a "parsed chunk list" data structure This simple data structure binds the location of each data payload inside of an RPC message to the chunk that will be used to push it to or pull it from the client. There are several benefits to this small additional overhead: * It enables support for more than one chunk in incoming Read and Write lists. * It translates the version-specific on-the-wire format into a generic in-memory structure, enabling support for multiple versions of the RPC/RDMA transport protocol. * It enables the server to re-organize a chunk list if it needs to adjust where Read chunk data lands in server memory without altering the contents of the XDR-encoded Receive buffer. Construction of these lists is done while sanity checking each incoming RPC/RDMA header. Subsequent patches will make use of the generated data structures. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 12 + include/linux/sunrpc/svc_rdma_pcl.h | 128 ++++++++++ include/trace/events/rpcrdma.h | 75 +++++- net/sunrpc/xprtrdma/Makefile | 2 +- net/sunrpc/xprtrdma/svc_rdma_pcl.c | 306 ++++++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 196 ++++++++------- 6 files changed, 635 insertions(+), 84 deletions(-) create mode 100644 include/linux/sunrpc/svc_rdma_pcl.h create mode 100644 net/sunrpc/xprtrdma/svc_rdma_pcl.c diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f5a3c852bb90..a89d4209fe2a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -47,6 +47,8 @@ #include #include #include +#include + #include #include @@ -142,8 +144,18 @@ struct svc_rdma_recv_ctxt { unsigned int rc_page_count; unsigned int rc_hdr_count; u32 rc_inv_rkey; + + struct svc_rdma_pcl rc_call_pcl; + + struct svc_rdma_pcl rc_read_pcl; + __be32 *rc_write_list; + struct svc_rdma_chunk *rc_cur_result_payload; + struct svc_rdma_pcl rc_write_pcl; + __be32 *rc_reply_chunk; + struct svc_rdma_pcl rc_reply_pcl; + unsigned int rc_read_payload_offset; unsigned int rc_read_payload_length; struct page *rc_pages[RPCSVC_MAXPAGES]; diff --git a/include/linux/sunrpc/svc_rdma_pcl.h b/include/linux/sunrpc/svc_rdma_pcl.h new file mode 100644 index 000000000000..7516ad0fae80 --- /dev/null +++ b/include/linux/sunrpc/svc_rdma_pcl.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Oracle and/or its affiliates + */ + +#ifndef SVC_RDMA_PCL_H +#define SVC_RDMA_PCL_H + +#include + +struct svc_rdma_segment { + u32 rs_handle; + u32 rs_length; + u64 rs_offset; +}; + +struct svc_rdma_chunk { + struct list_head ch_list; + + u32 ch_position; + u32 ch_length; + u32 ch_payload_length; + + u32 ch_segcount; + struct svc_rdma_segment ch_segments[]; +}; + +struct svc_rdma_pcl { + unsigned int cl_count; + struct list_head cl_chunks; +}; + +/** + * pcl_init - Initialize a parsed chunk list + * @pcl: parsed chunk list to initialize + * + */ +static inline void pcl_init(struct svc_rdma_pcl *pcl) +{ + INIT_LIST_HEAD(&pcl->cl_chunks); +} + +/** + * pcl_is_empty - Return true if parsed chunk list is empty + * @pcl: parsed chunk list + * + */ +static inline bool pcl_is_empty(const struct svc_rdma_pcl *pcl) +{ + return list_empty(&pcl->cl_chunks); +} + +/** + * pcl_first_chunk - Return first chunk in a parsed chunk list + * @pcl: parsed chunk list + * + * Returns the first chunk in the list, or NULL if the list is empty. + */ +static inline struct svc_rdma_chunk * +pcl_first_chunk(const struct svc_rdma_pcl *pcl) +{ + if (pcl_is_empty(pcl)) + return NULL; + return list_first_entry(&pcl->cl_chunks, struct svc_rdma_chunk, + ch_list); +} + +/** + * pcl_next_chunk - Return next chunk in a parsed chunk list + * @pcl: a parsed chunk list + * @chunk: chunk in @pcl + * + * Returns the next chunk in the list, or NULL if @chunk is already last. + */ +static inline struct svc_rdma_chunk * +pcl_next_chunk(const struct svc_rdma_pcl *pcl, struct svc_rdma_chunk *chunk) +{ + if (list_is_last(&chunk->ch_list, &pcl->cl_chunks)) + return NULL; + return list_next_entry(chunk, ch_list); +} + +/** + * pcl_for_each_chunk - Iterate over chunks in a parsed chunk list + * @pos: the loop cursor + * @pcl: a parsed chunk list + */ +#define pcl_for_each_chunk(pos, pcl) \ + for (pos = list_first_entry(&(pcl)->cl_chunks, struct svc_rdma_chunk, ch_list); \ + &pos->ch_list != &(pcl)->cl_chunks; \ + pos = list_next_entry(pos, ch_list)) + +/** + * pcl_for_each_segment - Iterate over segments in a parsed chunk + * @pos: the loop cursor + * @chunk: a parsed chunk + */ +#define pcl_for_each_segment(pos, chunk) \ + for (pos = &(chunk)->ch_segments[0]; \ + pos <= &(chunk)->ch_segments[(chunk)->ch_segcount - 1]; \ + pos++) + +/** + * pcl_chunk_end_offset - Return offset of byte range following @chunk + * @chunk: chunk in @pcl + * + * Returns starting offset of the region just after @chunk + */ +static inline unsigned int +pcl_chunk_end_offset(const struct svc_rdma_chunk *chunk) +{ + return xdr_align_size(chunk->ch_position + chunk->ch_payload_length); +} + +struct svc_rdma_recv_ctxt; + +extern void pcl_free(struct svc_rdma_pcl *pcl); +extern bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_pcl *pcl, __be32 *p); +extern int pcl_process_nonpayloads(const struct svc_rdma_pcl *pcl, + const struct xdr_buf *xdr, + int (*actor)(const struct xdr_buf *, + void *), + void *data); + +#endif /* SVC_RDMA_PCL_H */ diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index bf1065772228..72b941aef43b 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1446,12 +1446,83 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event, ), \ TP_ARGS(handle, length, offset)) -DEFINE_SEGMENT_EVENT(decode_wseg); -DEFINE_SEGMENT_EVENT(encode_rseg); DEFINE_SEGMENT_EVENT(send_rseg); DEFINE_SEGMENT_EVENT(encode_wseg); DEFINE_SEGMENT_EVENT(send_wseg); +TRACE_EVENT(svcrdma_decode_rseg, + TP_PROTO( + const struct rpc_rdma_cid *cid, + const struct svc_rdma_chunk *chunk, + const struct svc_rdma_segment *segment + ), + + TP_ARGS(cid, chunk, segment), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(u32, segno) + __field(u32, position) + __field(u32, handle) + __field(u32, length) + __field(u64, offset) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->segno = chunk->ch_segcount; + __entry->position = chunk->ch_position; + __entry->handle = segment->rs_handle; + __entry->length = segment->rs_length; + __entry->offset = segment->rs_offset; + ), + + TP_printk("cq_id=%u cid=%d segno=%u position=%u %u@0x%016llx:0x%08x", + __entry->cq_id, __entry->completion_id, + __entry->segno, __entry->position, __entry->length, + (unsigned long long)__entry->offset, __entry->handle + ) +); + +TRACE_EVENT(svcrdma_decode_wseg, + TP_PROTO( + const struct rpc_rdma_cid *cid, + const struct svc_rdma_chunk *chunk, + u32 segno + ), + + TP_ARGS(cid, chunk, segno), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(u32, segno) + __field(u32, handle) + __field(u32, length) + __field(u64, offset) + ), + + TP_fast_assign( + const struct svc_rdma_segment *segment = + &chunk->ch_segments[segno]; + + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->segno = segno; + __entry->handle = segment->rs_handle; + __entry->length = segment->rs_length; + __entry->offset = segment->rs_offset; + ), + + TP_printk("cq_id=%u cid=%d segno=%u %u@0x%016llx:0x%08x", + __entry->cq_id, __entry->completion_id, + __entry->segno, __entry->length, + (unsigned long long)__entry->offset, __entry->handle + ) +); + DECLARE_EVENT_CLASS(svcrdma_chunk_event, TP_PROTO( u32 length diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 8ed0377d7a18..55b21bae866d 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \ svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \ - module.o + svc_rdma_pcl.o module.o rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o diff --git a/net/sunrpc/xprtrdma/svc_rdma_pcl.c b/net/sunrpc/xprtrdma/svc_rdma_pcl.c new file mode 100644 index 000000000000..b63cfeaa2923 --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_pcl.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Oracle. All rights reserved. + */ + +#include +#include + +#include "xprt_rdma.h" +#include + +/** + * pcl_free - Release all memory associated with a parsed chunk list + * @pcl: parsed chunk list + * + */ +void pcl_free(struct svc_rdma_pcl *pcl) +{ + while (!list_empty(&pcl->cl_chunks)) { + struct svc_rdma_chunk *chunk; + + chunk = pcl_first_chunk(pcl); + list_del(&chunk->ch_list); + kfree(chunk); + } +} + +static struct svc_rdma_chunk *pcl_alloc_chunk(u32 segcount, u32 position) +{ + struct svc_rdma_chunk *chunk; + + chunk = kmalloc(struct_size(chunk, ch_segments, segcount), GFP_KERNEL); + if (!chunk) + return NULL; + + chunk->ch_position = position; + chunk->ch_length = 0; + chunk->ch_payload_length = 0; + chunk->ch_segcount = 0; + return chunk; +} + +static struct svc_rdma_chunk * +pcl_lookup_position(struct svc_rdma_pcl *pcl, u32 position) +{ + struct svc_rdma_chunk *pos; + + pcl_for_each_chunk(pos, pcl) { + if (pos->ch_position == position) + return pos; + } + return NULL; +} + +static void pcl_insert_position(struct svc_rdma_pcl *pcl, + struct svc_rdma_chunk *chunk) +{ + struct svc_rdma_chunk *pos; + + pcl_for_each_chunk(pos, pcl) { + if (pos->ch_position > chunk->ch_position) + break; + } + __list_add(&chunk->ch_list, pos->ch_list.prev, &pos->ch_list); + pcl->cl_count++; +} + +static void pcl_set_read_segment(const struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_chunk *chunk, + u32 handle, u32 length, u64 offset) +{ + struct svc_rdma_segment *segment; + + segment = &chunk->ch_segments[chunk->ch_segcount]; + segment->rs_handle = handle; + segment->rs_length = length; + segment->rs_offset = offset; + + trace_svcrdma_decode_rseg(&rctxt->rc_cid, chunk, segment); + + chunk->ch_length += length; + chunk->ch_segcount++; +} + +/** + * pcl_alloc_call - Construct a parsed chunk list for the Call body + * @rctxt: Ingress receive context + * @p: Start of an un-decoded Read list + * + * Assumptions: + * - The incoming Read list has already been sanity checked. + * - cl_count is already set to the number of segments in + * the un-decoded list. + * - The list might not be in order by position. + * + * Return values: + * %true: Parsed chunk list was successfully constructed, and + * cl_count is updated to be the number of chunks (ie. + * unique positions) in the Read list. + * %false: Memory allocation failed. + */ +bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p) +{ + struct svc_rdma_pcl *pcl = &rctxt->rc_call_pcl; + unsigned int i, segcount = pcl->cl_count; + + pcl->cl_count = 0; + for (i = 0; i < segcount; i++) { + struct svc_rdma_chunk *chunk; + u32 position, handle, length; + u64 offset; + + p++; /* skip the list discriminator */ + p = xdr_decode_read_segment(p, &position, &handle, + &length, &offset); + if (position != 0) + continue; + + if (pcl_is_empty(pcl)) { + chunk = pcl_alloc_chunk(segcount, position); + if (!chunk) + return false; + pcl_insert_position(pcl, chunk); + } else { + chunk = list_first_entry(&pcl->cl_chunks, + struct svc_rdma_chunk, + ch_list); + } + + pcl_set_read_segment(rctxt, chunk, handle, length, offset); + } + + return true; +} + +/** + * pcl_alloc_read - Construct a parsed chunk list for normal Read chunks + * @rctxt: Ingress receive context + * @p: Start of an un-decoded Read list + * + * Assumptions: + * - The incoming Read list has already been sanity checked. + * - cl_count is already set to the number of segments in + * the un-decoded list. + * - The list might not be in order by position. + * + * Return values: + * %true: Parsed chunk list was successfully constructed, and + * cl_count is updated to be the number of chunks (ie. + * unique position values) in the Read list. + * %false: Memory allocation failed. + * + * TODO: + * - Check for chunk range overlaps + */ +bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p) +{ + struct svc_rdma_pcl *pcl = &rctxt->rc_read_pcl; + unsigned int i, segcount = pcl->cl_count; + + pcl->cl_count = 0; + for (i = 0; i < segcount; i++) { + struct svc_rdma_chunk *chunk; + u32 position, handle, length; + u64 offset; + + p++; /* skip the list discriminator */ + p = xdr_decode_read_segment(p, &position, &handle, + &length, &offset); + if (position == 0) + continue; + + chunk = pcl_lookup_position(pcl, position); + if (!chunk) { + chunk = pcl_alloc_chunk(segcount, position); + if (!chunk) + return false; + pcl_insert_position(pcl, chunk); + } + + pcl_set_read_segment(rctxt, chunk, handle, length, offset); + } + + return true; +} + +/** + * pcl_alloc_write - Construct a parsed chunk list from a Write list + * @rctxt: Ingress receive context + * @pcl: Parsed chunk list to populate + * @p: Start of an un-decoded Write list + * + * Assumptions: + * - The incoming Write list has already been sanity checked, and + * - cl_count is set to the number of chunks in the un-decoded list. + * + * Return values: + * %true: Parsed chunk list was successfully constructed. + * %false: Memory allocation failed. + */ +bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_pcl *pcl, __be32 *p) +{ + struct svc_rdma_segment *segment; + struct svc_rdma_chunk *chunk; + unsigned int i, j; + u32 segcount; + + for (i = 0; i < pcl->cl_count; i++) { + p++; /* skip the list discriminator */ + segcount = be32_to_cpup(p++); + + chunk = pcl_alloc_chunk(segcount, 0); + if (!chunk) + return false; + list_add_tail(&chunk->ch_list, &pcl->cl_chunks); + + for (j = 0; j < segcount; j++) { + segment = &chunk->ch_segments[j]; + p = xdr_decode_rdma_segment(p, &segment->rs_handle, + &segment->rs_length, + &segment->rs_offset); + trace_svcrdma_decode_wseg(&rctxt->rc_cid, chunk, j); + + chunk->ch_length += segment->rs_length; + chunk->ch_segcount++; + } + } + return true; +} + +static int pcl_process_region(const struct xdr_buf *xdr, + unsigned int offset, unsigned int length, + int (*actor)(const struct xdr_buf *, void *), + void *data) +{ + struct xdr_buf subbuf; + + if (!length) + return 0; + if (xdr_buf_subsegment(xdr, &subbuf, offset, length)) + return -EMSGSIZE; + return actor(&subbuf, data); +} + +/** + * pcl_process_nonpayloads - Process non-payload regions inside @xdr + * @pcl: Chunk list to process + * @xdr: xdr_buf to process + * @actor: Function to invoke on each non-payload region + * @data: Arguments for @actor + * + * This mechanism must ignore not only result payloads that were already + * sent via RDMA Write, but also XDR padding for those payloads that + * the upper layer has added. + * + * Assumptions: + * The xdr->len and ch_position fields are aligned to 4-byte multiples. + * + * Returns: + * On success, zero, + * %-EMSGSIZE on XDR buffer overflow, or + * The return value of @actor + */ +int pcl_process_nonpayloads(const struct svc_rdma_pcl *pcl, + const struct xdr_buf *xdr, + int (*actor)(const struct xdr_buf *, void *), + void *data) +{ + struct svc_rdma_chunk *chunk, *next; + unsigned int start; + int ret; + + chunk = pcl_first_chunk(pcl); + + /* No result payloads were generated */ + if (!chunk || !chunk->ch_payload_length) + return actor(xdr, data); + + /* Process the region before the first result payload */ + ret = pcl_process_region(xdr, 0, chunk->ch_position, actor, data); + if (ret < 0) + return ret; + + /* Process the regions between each middle result payload */ + while ((next = pcl_next_chunk(pcl, chunk))) { + if (!next->ch_payload_length) + break; + + start = pcl_chunk_end_offset(chunk); + ret = pcl_process_region(xdr, start, next->ch_position - start, + actor, data); + if (ret < 0) + return ret; + + chunk = next; + } + + /* Process the region after the last result payload */ + start = pcl_chunk_end_offset(chunk); + ret = pcl_process_region(xdr, start, xdr->len - start, actor, data); + if (ret < 0) + return ret; + + return 0; +} diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index c6ea2903c21a..ec9d259b149c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -93,6 +93,7 @@ * (see rdma_read_complete() below). */ +#include #include #include #include @@ -143,6 +144,10 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) goto fail2; svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid); + pcl_init(&ctxt->rc_call_pcl); + pcl_init(&ctxt->rc_read_pcl); + pcl_init(&ctxt->rc_write_pcl); + pcl_init(&ctxt->rc_reply_pcl); ctxt->rc_recv_wr.next = NULL; ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe; @@ -226,6 +231,11 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, for (i = 0; i < ctxt->rc_page_count; i++) put_page(ctxt->rc_pages[i]); + pcl_free(&ctxt->rc_call_pcl); + pcl_free(&ctxt->rc_read_pcl); + pcl_free(&ctxt->rc_write_pcl); + pcl_free(&ctxt->rc_reply_pcl); + if (!ctxt->rc_temp) llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); else @@ -385,100 +395,123 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, arg->len = ctxt->rc_byte_len; } -/* This accommodates the largest possible Write chunk. - */ -#define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT)) - -/* This accommodates the largest possible Position-Zero - * Read chunk or Reply chunk. - */ -#define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT)) - -/* Sanity check the Read list. +/** + * xdr_count_read_segments - Count number of Read segments in Read list + * @rctxt: Ingress receive context + * @p: Start of an un-decoded Read list * - * Implementation limits: - * - This implementation supports only one Read chunk. + * Before allocating anything, ensure the ingress Read list is safe + * to use. * - * Sanity checks: - * - Read list does not overflow Receive buffer. - * - Segment size limited by largest NFS data payload. - * - * The segment count is limited to how many segments can - * fit in the transport header without overflowing the - * buffer. That's about 40 Read segments for a 1KB inline - * threshold. + * The segment count is limited to how many segments can fit in the + * transport header without overflowing the buffer. That's about 40 + * Read segments for a 1KB inline threshold. * * Return values: - * %true: Read list is valid. @rctxt's xdr_stream is updated - * to point to the first byte past the Read list. - * %false: Read list is corrupt. @rctxt's xdr_stream is left - * in an unknown state. + * %true: Read list is valid. @rctxt's xdr_stream is updated to point + * to the first byte past the Read list. rc_read_pcl and + * rc_call_pcl cl_count fields are set to the number of + * Read segments in the list. + * %false: Read list is corrupt. @rctxt's xdr_stream is left in an + * unknown state. */ -static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt) +static bool xdr_count_read_segments(struct svc_rdma_recv_ctxt *rctxt, __be32 *p) { - u32 position, len; - bool first; - __be32 *p; - - p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); - if (!p) - return false; - - len = 0; - first = true; + rctxt->rc_call_pcl.cl_count = 0; + rctxt->rc_read_pcl.cl_count = 0; while (xdr_item_is_present(p)) { + u32 position, handle, length; + u64 offset; + p = xdr_inline_decode(&rctxt->rc_stream, rpcrdma_readseg_maxsz * sizeof(*p)); if (!p) return false; - if (first) { - position = be32_to_cpup(p); - first = false; - } else if (be32_to_cpup(p) != position) { - return false; + xdr_decode_read_segment(p, &position, &handle, + &length, &offset); + if (position) { + if (position & 3) + return false; + ++rctxt->rc_read_pcl.cl_count; + } else { + ++rctxt->rc_call_pcl.cl_count; } - p += 2; - len += be32_to_cpup(p); p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); if (!p) return false; } - return len <= MAX_BYTES_SPECIAL_CHUNK; + return true; } -/* The segment count is limited to how many segments can - * fit in the transport header without overflowing the - * buffer. That's about 60 Write segments for a 1KB inline - * threshold. +/* Sanity check the Read list. + * + * Sanity checks: + * - Read list does not overflow Receive buffer. + * - Chunk size limited by largest NFS data payload. + * + * Return values: + * %true: Read list is valid. @rctxt's xdr_stream is updated + * to point to the first byte past the Read list. + * %false: Read list is corrupt. @rctxt's xdr_stream is left + * in an unknown state. */ -static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen) +static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt) { - u32 i, segcount, total; __be32 *p; p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); if (!p) return false; - segcount = be32_to_cpup(p); + if (!xdr_count_read_segments(rctxt, p)) + return false; + if (!pcl_alloc_call(rctxt, p)) + return false; + return pcl_alloc_read(rctxt, p); +} - total = 0; - for (i = 0; i < segcount; i++) { - u32 handle, length; - u64 offset; +static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt) +{ + u32 segcount; + __be32 *p; - p = xdr_inline_decode(&rctxt->rc_stream, - rpcrdma_segment_maxsz * sizeof(*p)); + if (xdr_stream_decode_u32(&rctxt->rc_stream, &segcount)) + return false; + + /* A bogus segcount causes this buffer overflow check to fail. */ + p = xdr_inline_decode(&rctxt->rc_stream, + segcount * rpcrdma_segment_maxsz * sizeof(*p)); + return p != NULL; +} + +/** + * xdr_count_write_chunks - Count number of Write chunks in Write list + * @rctxt: Received header and decoding state + * @p: start of an un-decoded Write list + * + * Before allocating anything, ensure the ingress Write list is + * safe to use. + * + * Return values: + * %true: Write list is valid. @rctxt's xdr_stream is updated + * to point to the first byte past the Write list, and + * the number of Write chunks is in rc_write_pcl.cl_count. + * %false: Write list is corrupt. @rctxt's xdr_stream is left + * in an indeterminate state. + */ +static bool xdr_count_write_chunks(struct svc_rdma_recv_ctxt *rctxt, __be32 *p) +{ + rctxt->rc_write_pcl.cl_count = 0; + while (xdr_item_is_present(p)) { + if (!xdr_check_write_chunk(rctxt)) + return false; + ++rctxt->rc_write_pcl.cl_count; + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); if (!p) return false; - - xdr_decode_rdma_segment(p, &handle, &length, &offset); - trace_svcrdma_decode_wseg(handle, length, offset); - - total += length; } - return total <= maxlen; + return true; } /* Sanity check the Write list. @@ -498,24 +531,22 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen) */ static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt) { - u32 chcount = 0; __be32 *p; p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); if (!p) return false; - rctxt->rc_write_list = p; - while (xdr_item_is_present(p)) { - if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK)) - return false; - ++chcount; - p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); - if (!p) - return false; - } - if (!chcount) - rctxt->rc_write_list = NULL; - return chcount < 2; + + rctxt->rc_write_list = NULL; + if (!xdr_count_write_chunks(rctxt, p)) + return false; + if (!pcl_alloc_write(rctxt, &rctxt->rc_write_pcl, p)) + return false; + + if (!pcl_is_empty(&rctxt->rc_write_pcl)) + rctxt->rc_write_list = p; + rctxt->rc_cur_result_payload = pcl_first_chunk(&rctxt->rc_write_pcl); + return rctxt->rc_write_pcl.cl_count < 2; } /* Sanity check the Reply chunk. @@ -537,13 +568,16 @@ static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt) p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); if (!p) return false; + rctxt->rc_reply_chunk = NULL; - if (xdr_item_is_present(p)) { - if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK)) - return false; - rctxt->rc_reply_chunk = p; - } - return true; + if (!xdr_item_is_present(p)) + return true; + if (!xdr_check_write_chunk(rctxt)) + return false; + + rctxt->rc_reply_chunk = p; + rctxt->rc_reply_pcl.cl_count = 1; + return pcl_alloc_write(rctxt, &rctxt->rc_reply_pcl, p); } /* RPC-over-RDMA Version One private extension: Remote Invalidation. From eb3de6a49dd587d6670c404769561f3e283b71e4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 22 Jun 2020 09:46:05 -0400 Subject: [PATCH 010/131] svcrdma: Use parsed chunk lists to derive the inv_rkey Refactor: Don't duplicate header decoding smarts here. Instead, use the new parsed chunk lists. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 69 +++++++++++-------------- 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ec9d259b149c..2755ca178b09 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -586,60 +586,53 @@ static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt) * * If there is exactly one distinct R_key in the received transport * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero. - * - * Perform this operation while the received transport header is - * still in the CPU cache. */ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt) { - __be32 inv_rkey, *p; - u32 i, segcount; + struct svc_rdma_segment *segment; + struct svc_rdma_chunk *chunk; + u32 inv_rkey; ctxt->rc_inv_rkey = 0; if (!rdma->sc_snd_w_inv) return; - inv_rkey = xdr_zero; - p = ctxt->rc_recv_buf; - p += rpcrdma_fixed_maxsz; - - /* Read list */ - while (xdr_item_is_present(p++)) { - p++; /* position */ - if (inv_rkey == xdr_zero) - inv_rkey = *p; - else if (inv_rkey != *p) - return; - p += 4; - } - - /* Write list */ - while (xdr_item_is_present(p++)) { - segcount = be32_to_cpup(p++); - for (i = 0; i < segcount; i++) { - if (inv_rkey == xdr_zero) - inv_rkey = *p; - else if (inv_rkey != *p) + inv_rkey = 0; + pcl_for_each_chunk(chunk, &ctxt->rc_call_pcl) { + pcl_for_each_segment(segment, chunk) { + if (inv_rkey == 0) + inv_rkey = segment->rs_handle; + else if (inv_rkey != segment->rs_handle) return; - p += 4; } } - - /* Reply chunk */ - if (xdr_item_is_present(p++)) { - segcount = be32_to_cpup(p++); - for (i = 0; i < segcount; i++) { - if (inv_rkey == xdr_zero) - inv_rkey = *p; - else if (inv_rkey != *p) + pcl_for_each_chunk(chunk, &ctxt->rc_read_pcl) { + pcl_for_each_segment(segment, chunk) { + if (inv_rkey == 0) + inv_rkey = segment->rs_handle; + else if (inv_rkey != segment->rs_handle) return; - p += 4; } } - - ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey); + pcl_for_each_chunk(chunk, &ctxt->rc_write_pcl) { + pcl_for_each_segment(segment, chunk) { + if (inv_rkey == 0) + inv_rkey = segment->rs_handle; + else if (inv_rkey != segment->rs_handle) + return; + } + } + pcl_for_each_chunk(chunk, &ctxt->rc_reply_pcl) { + pcl_for_each_segment(segment, chunk) { + if (inv_rkey == 0) + inv_rkey = segment->rs_handle; + else if (inv_rkey != segment->rs_handle) + return; + } + } + ctxt->rc_inv_rkey = inv_rkey; } /** From 58b2e0fefa891c99f297120c8c062a35005dc562 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 22 Mar 2020 13:06:55 -0400 Subject: [PATCH 011/131] svcrdma: Use parsed chunk lists to detect reverse direction replies Refactor: Don't duplicate header decoding smarts here. Instead, use the new parsed chunk lists. Note that the XID sanity test is also removed. The XID is already looked up by the cb handler, and is rejected if it's not recognized. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 29 ++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index a89d4209fe2a..74247a33b6c6 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -144,6 +144,7 @@ struct svc_rdma_recv_ctxt { unsigned int rc_page_count; unsigned int rc_hdr_count; u32 rc_inv_rkey; + __be32 rc_msgtype; struct svc_rdma_pcl rc_call_pcl; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 2755ca178b09..72b07e8aa3c9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -668,7 +668,8 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, if (*p != rpcrdma_version) goto out_version; p += 2; - switch (*p) { + rctxt->rc_msgtype = *p; + switch (rctxt->rc_msgtype) { case rdma_msg: break; case rdma_nomsg: @@ -762,30 +763,28 @@ static void svc_rdma_send_error(struct svcxprt_rdma *rdma, * the RPC/RDMA header small and fixed in size, so it is * straightforward to check the RPC header's direction field. */ -static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, - __be32 *rdma_resp) +static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, + struct svc_rdma_recv_ctxt *rctxt) { - __be32 *p; + __be32 *p = rctxt->rc_recv_buf; if (!xprt->xpt_bc_xprt) return false; - p = rdma_resp + 3; - if (*p++ != rdma_msg) + if (rctxt->rc_msgtype != rdma_msg) return false; - if (*p++ != xdr_zero) + if (!pcl_is_empty(&rctxt->rc_call_pcl)) return false; - if (*p++ != xdr_zero) + if (!pcl_is_empty(&rctxt->rc_read_pcl)) return false; - if (*p++ != xdr_zero) + if (!pcl_is_empty(&rctxt->rc_write_pcl)) + return false; + if (!pcl_is_empty(&rctxt->rc_reply_pcl)) return false; - /* XID sanity */ - if (*p++ != *rdma_resp) - return false; - /* call direction */ - if (*p == cpu_to_be32(RPC_CALL)) + /* RPC call direction */ + if (*(p + 8) == cpu_to_be32(RPC_CALL)) return false; return true; @@ -868,7 +867,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_drop; rqstp->rq_xprt_hlen = ret; - if (svc_rdma_is_backchannel_reply(xprt, p)) + if (svc_rdma_is_reverse_direction_reply(xprt, ctxt)) goto out_backchannel; svc_rdma_get_inv_rkey(rdma_xprt, ctxt); From 7a1cbfa18059a40d4752dab057384c3ca2de326c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 17 Jun 2020 11:07:00 -0400 Subject: [PATCH 012/131] svcrdma: Use parsed chunk lists to construct RDMA Writes Refactor: Instead of re-parsing the ingress RPC Call transport header when constructing RDMA Writes, use the new parsed chunk lists for the Write list and Reply chunk, which are version-agnostic and already XDR-decoded. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 5 ++- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 1 - net/sunrpc/xprtrdma/svc_rdma_rw.c | 47 ++++++++++++------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 22 +++++++----- 4 files changed, 38 insertions(+), 37 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 74247a33b6c6..d9148787efff 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -157,8 +157,6 @@ struct svc_rdma_recv_ctxt { __be32 *rc_reply_chunk; struct svc_rdma_pcl rc_reply_pcl; - unsigned int rc_read_payload_offset; - unsigned int rc_read_payload_length; struct page *rc_pages[RPCSVC_MAXPAGES]; }; @@ -196,7 +194,8 @@ extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - __be32 *wr_ch, const struct xdr_buf *xdr); + const struct svc_rdma_chunk *chunk, + const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 72b07e8aa3c9..7d44e9d2e7a3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -207,7 +207,6 @@ svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) out: ctxt->rc_page_count = 0; - ctxt->rc_read_payload_length = 0; return ctxt; out_empty: diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index e6050230b49f..ed7def7b801b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -190,11 +190,11 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, * - Stores arguments for the SGL constructor functions */ struct svc_rdma_write_info { + const struct svc_rdma_chunk *wi_chunk; + /* write state of this chunk */ unsigned int wi_seg_off; unsigned int wi_seg_no; - unsigned int wi_nsegs; - __be32 *wi_segs; /* SGL constructor arguments */ const struct xdr_buf *wi_xdr; @@ -205,7 +205,8 @@ struct svc_rdma_write_info { }; static struct svc_rdma_write_info * -svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk) +svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, + const struct svc_rdma_chunk *chunk) { struct svc_rdma_write_info *info; @@ -213,10 +214,9 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk) if (!info) return info; + info->wi_chunk = chunk; info->wi_seg_off = 0; info->wi_seg_no = 0; - info->wi_nsegs = be32_to_cpup(++chunk); - info->wi_segs = ++chunk; svc_rdma_cc_init(rdma, &info->wi_cc); info->wi_cc.cc_cqe.done = svc_rdma_write_done; return info; @@ -443,23 +443,19 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, { struct svc_rdma_chunk_ctxt *cc = &info->wi_cc; struct svcxprt_rdma *rdma = cc->cc_rdma; + const struct svc_rdma_segment *seg; struct svc_rdma_rw_ctxt *ctxt; - __be32 *seg; int ret; - seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz; do { unsigned int write_len; - u32 handle, length; u64 offset; - if (info->wi_seg_no >= info->wi_nsegs) + seg = &info->wi_chunk->ch_segments[info->wi_seg_no]; + if (!seg) goto out_overflow; - xdr_decode_rdma_segment(seg, &handle, &length, &offset); - offset += info->wi_seg_off; - - write_len = min(remaining, length - info->wi_seg_off); + write_len = min(remaining, seg->rs_length - info->wi_seg_off); if (!write_len) goto out_overflow; ctxt = svc_rdma_get_rw_ctxt(rdma, @@ -468,17 +464,17 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, return -ENOMEM; constructor(info, write_len, ctxt); - ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, handle, + offset = seg->rs_offset + info->wi_seg_off; + ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, seg->rs_handle, DMA_TO_DEVICE); if (ret < 0) return -EIO; - trace_svcrdma_send_wseg(handle, write_len, offset); + trace_svcrdma_send_wseg(seg->rs_handle, write_len, offset); list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; - if (write_len == length - info->wi_seg_off) { - seg += 4; + if (write_len == seg->rs_length - info->wi_seg_off) { info->wi_seg_no++; info->wi_seg_off = 0; } else { @@ -491,7 +487,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, out_overflow: trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no, - info->wi_nsegs); + info->wi_chunk->ch_segcount); return -E2BIG; } @@ -579,7 +575,7 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, /** * svc_rdma_send_write_chunk - Write all segments in a Write chunk * @rdma: controlling RDMA transport - * @wr_ch: Write chunk provided by client + * @chunk: Write chunk provided by the client * @xdr: xdr_buf containing the data payload * * Returns a non-negative number of bytes the chunk consumed, or @@ -589,13 +585,14 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, * %-ENOTCONN if posting failed (connection is lost), * %-EIO if rdma_rw initialization failed (DMA mapping, etc). */ -int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, +int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_chunk *chunk, const struct xdr_buf *xdr) { struct svc_rdma_write_info *info; int ret; - info = svc_rdma_write_info_alloc(rdma, wr_ch); + info = svc_rdma_write_info_alloc(rdma, chunk); if (!info) return -ENOMEM; @@ -633,12 +630,14 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, struct xdr_buf *xdr) { struct svc_rdma_write_info *info; + struct svc_rdma_chunk *chunk; int consumed, ret; - if (!rctxt->rc_reply_chunk) + if (pcl_is_empty(&rctxt->rc_reply_pcl)) return 0; - info = svc_rdma_write_info_alloc(rdma, rctxt->rc_reply_chunk); + chunk = pcl_first_chunk(&rctxt->rc_reply_pcl); + info = svc_rdma_write_info_alloc(rdma, chunk); if (!info) return -ENOMEM; @@ -650,7 +649,7 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, /* Send the page list in the Reply chunk only if the * client did not provide Write chunks. */ - if (!rctxt->rc_write_list && xdr->page_len) { + if (pcl_is_empty(&rctxt->rc_write_pcl) && xdr->page_len) { ret = svc_rdma_pages_write(info, xdr, xdr->head[0].iov_len, xdr->page_len); if (ret < 0) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 922dbe573570..9a6d3dee69ed 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -466,12 +466,14 @@ static ssize_t svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, struct svc_rdma_send_ctxt *sctxt) { + struct svc_rdma_chunk *chunk; ssize_t len, ret; len = 0; if (rctxt->rc_write_list) { + chunk = pcl_first_chunk(&rctxt->rc_write_pcl); ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, - rctxt->rc_read_payload_length); + chunk->ch_payload_length); if (ret < 0) return ret; len = ret; @@ -978,25 +980,27 @@ int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length) { struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; + struct svc_rdma_chunk *chunk; struct svcxprt_rdma *rdma; struct xdr_buf subbuf; int ret; - if (!rctxt->rc_write_list || !length) + chunk = rctxt->rc_cur_result_payload; + if (!length || !chunk) return 0; + rctxt->rc_cur_result_payload = + pcl_next_chunk(&rctxt->rc_write_pcl, chunk); + if (length > chunk->ch_length) + return -E2BIG; - /* XXX: Just one READ payload slot for now, since our - * transport implementation currently supports only one - * Write chunk. - */ - rctxt->rc_read_payload_offset = offset; - rctxt->rc_read_payload_length = length; + chunk->ch_position = offset; + chunk->ch_payload_length = length; if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length)) return -EMSGSIZE; rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); - ret = svc_rdma_send_write_chunk(rdma, rctxt->rc_write_list, &subbuf); + ret = svc_rdma_send_write_chunk(rdma, chunk, &subbuf); if (ret < 0) return ret; return 0; From 6911f3e10cd9792ccfd6980da91a171f54984692 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 17 Jun 2020 11:50:34 -0400 Subject: [PATCH 013/131] svcrdma: Use parsed chunk lists to encode Reply transport headers Refactor: Instead of re-parsing the ingress RPC Call transport header when constructing the egress RPC Reply transport header, use the new parsed Write list and Reply chunk, which are version- agnostic and already XDR decoded. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 37 ++++++++- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 105 +++++++++++--------------- 2 files changed, 80 insertions(+), 62 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 72b941aef43b..5218e0f9596a 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1447,9 +1447,44 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event, TP_ARGS(handle, length, offset)) DEFINE_SEGMENT_EVENT(send_rseg); -DEFINE_SEGMENT_EVENT(encode_wseg); DEFINE_SEGMENT_EVENT(send_wseg); +TRACE_EVENT(svcrdma_encode_wseg, + TP_PROTO( + const struct svc_rdma_send_ctxt *ctxt, + u32 segno, + u32 handle, + u32 length, + u64 offset + ), + + TP_ARGS(ctxt, segno, handle, length, offset), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(u32, segno) + __field(u32, handle) + __field(u32, length) + __field(u64, offset) + ), + + TP_fast_assign( + __entry->cq_id = ctxt->sc_cid.ci_queue_id; + __entry->completion_id = ctxt->sc_cid.ci_completion_id; + __entry->segno = segno; + __entry->handle = handle; + __entry->length = length; + __entry->offset = offset; + ), + + TP_printk("cq_id=%u cid=%d segno=%u %u@0x%016llx:0x%08x", + __entry->cq_id, __entry->completion_id, + __entry->segno, __entry->length, + (unsigned long long)__entry->offset, __entry->handle + ) +); + TRACE_EVENT(svcrdma_decode_rseg, TP_PROTO( const struct rpc_rdma_cid *cid, diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 9a6d3dee69ed..1fdbbad3f7dc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -358,49 +358,42 @@ static ssize_t svc_rdma_encode_read_list(struct svc_rdma_send_ctxt *sctxt) /** * svc_rdma_encode_write_segment - Encode one Write segment - * @src: matching Write chunk in the RPC Call header * @sctxt: Send context for the RPC Reply + * @chunk: Write chunk to push * @remaining: remaining bytes of the payload left in the Write chunk + * @segno: which segment in the chunk * * Return values: * On success, returns length in bytes of the Reply XDR buffer - * that was consumed by the Write segment + * that was consumed by the Write segment, and updates @remaining * %-EMSGSIZE on XDR buffer overflow */ -static ssize_t svc_rdma_encode_write_segment(__be32 *src, - struct svc_rdma_send_ctxt *sctxt, - unsigned int *remaining) +static ssize_t svc_rdma_encode_write_segment(struct svc_rdma_send_ctxt *sctxt, + const struct svc_rdma_chunk *chunk, + u32 *remaining, unsigned int segno) { + const struct svc_rdma_segment *segment = &chunk->ch_segments[segno]; + const size_t len = rpcrdma_segment_maxsz * sizeof(__be32); + u32 length; __be32 *p; - const size_t len = rpcrdma_segment_maxsz * sizeof(*p); - u32 handle, length; - u64 offset; p = xdr_reserve_space(&sctxt->sc_stream, len); if (!p) return -EMSGSIZE; - xdr_decode_rdma_segment(src, &handle, &length, &offset); - - if (*remaining < length) { - /* segment only partly filled */ - length = *remaining; - *remaining = 0; - } else { - /* entire segment was consumed */ - *remaining -= length; - } - xdr_encode_rdma_segment(p, handle, length, offset); - - trace_svcrdma_encode_wseg(handle, length, offset); + length = min_t(u32, *remaining, segment->rs_length); + *remaining -= length; + xdr_encode_rdma_segment(p, segment->rs_handle, length, + segment->rs_offset); + trace_svcrdma_encode_wseg(sctxt, segno, segment->rs_handle, length, + segment->rs_offset); return len; } /** * svc_rdma_encode_write_chunk - Encode one Write chunk - * @src: matching Write chunk in the RPC Call header * @sctxt: Send context for the RPC Reply - * @remaining: size in bytes of the payload in the Write chunk + * @chunk: Write chunk to push * * Copy a Write chunk from the Call transport header to the * Reply transport header. Update each segment's length field @@ -411,33 +404,30 @@ static ssize_t svc_rdma_encode_write_segment(__be32 *src, * that was consumed by the Write chunk * %-EMSGSIZE on XDR buffer overflow */ -static ssize_t svc_rdma_encode_write_chunk(__be32 *src, - struct svc_rdma_send_ctxt *sctxt, - unsigned int remaining) +static ssize_t svc_rdma_encode_write_chunk(struct svc_rdma_send_ctxt *sctxt, + const struct svc_rdma_chunk *chunk) { - unsigned int i, nsegs; + u32 remaining = chunk->ch_payload_length; + unsigned int segno; ssize_t len, ret; - len = 0; trace_svcrdma_encode_write_chunk(remaining); - src++; + len = 0; ret = xdr_stream_encode_item_present(&sctxt->sc_stream); if (ret < 0) - return -EMSGSIZE; + return ret; len += ret; - nsegs = be32_to_cpup(src++); - ret = xdr_stream_encode_u32(&sctxt->sc_stream, nsegs); + ret = xdr_stream_encode_u32(&sctxt->sc_stream, chunk->ch_segcount); if (ret < 0) - return -EMSGSIZE; + return ret; len += ret; - for (i = nsegs; i; i--) { - ret = svc_rdma_encode_write_segment(src, sctxt, &remaining); + for (segno = 0; segno < chunk->ch_segcount; segno++) { + ret = svc_rdma_encode_write_segment(sctxt, chunk, &remaining, segno); if (ret < 0) - return -EMSGSIZE; - src += rpcrdma_segment_maxsz; + return ret; len += ret; } @@ -449,34 +439,23 @@ static ssize_t svc_rdma_encode_write_chunk(__be32 *src, * @rctxt: Reply context with information about the RPC Call * @sctxt: Send context for the RPC Reply * - * The client provides a Write chunk list in the Call message. Fill - * in the segments in the first Write chunk in the Reply's transport - * header with the number of bytes consumed in each segment. - * Remaining chunks are returned unused. - * - * Assumptions: - * - Client has provided only one Write chunk - * * Return values: * On success, returns length in bytes of the Reply XDR buffer * that was consumed by the Reply's Write list * %-EMSGSIZE on XDR buffer overflow */ -static ssize_t -svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, - struct svc_rdma_send_ctxt *sctxt) +static ssize_t svc_rdma_encode_write_list(struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_send_ctxt *sctxt) { struct svc_rdma_chunk *chunk; ssize_t len, ret; len = 0; - if (rctxt->rc_write_list) { - chunk = pcl_first_chunk(&rctxt->rc_write_pcl); - ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, - chunk->ch_payload_length); + pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) { + ret = svc_rdma_encode_write_chunk(sctxt, chunk); if (ret < 0) return ret; - len = ret; + len += ret; } /* Terminate the Write list */ @@ -493,24 +472,28 @@ svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, * @sctxt: Send context for the RPC Reply * @length: size in bytes of the payload in the Reply chunk * - * Assumptions: - * - Reply can always fit in the client-provided Reply chunk - * * Return values: * On success, returns length in bytes of the Reply XDR buffer * that was consumed by the Reply's Reply chunk * %-EMSGSIZE on XDR buffer overflow + * %-E2BIG if the RPC message is larger than the Reply chunk */ static ssize_t -svc_rdma_encode_reply_chunk(const struct svc_rdma_recv_ctxt *rctxt, +svc_rdma_encode_reply_chunk(struct svc_rdma_recv_ctxt *rctxt, struct svc_rdma_send_ctxt *sctxt, unsigned int length) { - if (!rctxt->rc_reply_chunk) + struct svc_rdma_chunk *chunk; + + if (pcl_is_empty(&rctxt->rc_reply_pcl)) return xdr_stream_encode_item_absent(&sctxt->sc_stream); - return svc_rdma_encode_write_chunk(rctxt->rc_reply_chunk, sctxt, - length); + chunk = pcl_first_chunk(&rctxt->rc_reply_pcl); + if (length > chunk->ch_length) + return -E2BIG; + + chunk->ch_payload_length = length; + return svc_rdma_encode_write_chunk(sctxt, chunk); } static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, @@ -928,7 +911,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) *p++ = *rdma_argp; *p++ = *(rdma_argp + 1); *p++ = rdma->sc_fc_credits; - *p = rctxt->rc_reply_chunk ? rdma_nomsg : rdma_msg; + *p = pcl_is_empty(&rctxt->rc_reply_pcl) ? rdma_msg : rdma_nomsg; if (svc_rdma_encode_read_list(sctxt) < 0) goto err0; From 9d0b09d5ef0c842592a5df3a5b8b59124485ff1b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:11 -0400 Subject: [PATCH 014/131] svcrdma: Support multiple write chunks when pulling up When counting the number of SGEs needed to construct a Send request, do not count result payloads. And, when copying the Reply message into the pull-up buffer, result payloads are not to be copied to the Send buffer. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 + include/trace/events/rpcrdma.h | 20 ++- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 14 +- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 9 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 186 ++++++++++++--------- 5 files changed, 145 insertions(+), 86 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d9148787efff..7090af1a9791 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -182,6 +182,8 @@ extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, /* svc_rdma_recvfrom.c */ extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); +extern struct svc_rdma_recv_ctxt * + svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt); extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 5218e0f9596a..afc58accb9cf 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1805,20 +1805,30 @@ TRACE_EVENT(svcrdma_small_wrch_err, TRACE_EVENT(svcrdma_send_pullup, TP_PROTO( - unsigned int len + const struct svc_rdma_send_ctxt *ctxt, + unsigned int msglen ), - TP_ARGS(len), + TP_ARGS(ctxt, msglen), TP_STRUCT__entry( - __field(unsigned int, len) + __field(u32, cq_id) + __field(int, completion_id) + __field(unsigned int, hdrlen) + __field(unsigned int, msglen) ), TP_fast_assign( - __entry->len = len; + __entry->cq_id = ctxt->sc_cid.ci_queue_id; + __entry->completion_id = ctxt->sc_cid.ci_completion_id; + __entry->hdrlen = ctxt->sc_hdrbuf.len, + __entry->msglen = msglen; ), - TP_printk("len=%u", __entry->len) + TP_printk("cq_id=%u cid=%d hdr=%u msg=%u (total %u)", + __entry->cq_id, __entry->completion_id, + __entry->hdrlen, __entry->msglen, + __entry->hdrlen + __entry->msglen) ); TRACE_EVENT(svcrdma_send_err, diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 5e7c4ba9e147..63f8be974df2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -74,11 +74,17 @@ void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, */ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst, - struct svc_rdma_send_ctxt *ctxt) + struct svc_rdma_send_ctxt *sctxt) { + struct svc_rdma_recv_ctxt *rctxt; int ret; - ret = svc_rdma_map_reply_msg(rdma, ctxt, NULL, &rqst->rq_snd_buf); + rctxt = svc_rdma_recv_ctxt_get(rdma); + if (!rctxt) + return -EIO; + + ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqst->rq_snd_buf); + svc_rdma_recv_ctxt_put(rdma, rctxt); if (ret < 0) return -EIO; @@ -86,8 +92,8 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, * the rq_buffer before all retransmits are complete. */ get_page(virt_to_page(rqst->rq_buffer)); - ctxt->sc_send_wr.opcode = IB_WR_SEND; - return svc_rdma_send(rdma, ctxt); + sctxt->sc_send_wr.opcode = IB_WR_SEND; + return svc_rdma_send(rdma, sctxt); } /* Server-side transport endpoint wants a whole page for its send diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 7d44e9d2e7a3..af32c3ad45a6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -194,8 +194,13 @@ void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma) } } -static struct svc_rdma_recv_ctxt * -svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) +/** + * svc_rdma_recv_ctxt_get - Allocate a recv_ctxt + * @rdma: controlling svcxprt_rdma + * + * Returns a recv_ctxt or (rarely) NULL if none are available. + */ +struct svc_rdma_recv_ctxt *svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) { struct svc_rdma_recv_ctxt *ctxt; struct llist_node *node; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 1fdbbad3f7dc..50f0216fb08c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -531,6 +531,45 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, offset_in_page(base), len); } +struct svc_rdma_pullup_data { + u8 *pd_dest; + unsigned int pd_length; + unsigned int pd_num_sges; +}; + +/** + * svc_rdma_xb_count_sges - Count how many SGEs will be needed + * @xdr: xdr_buf containing portion of an RPC message to transmit + * @data: pointer to arguments + * + * Returns: + * Number of SGEs needed to Send the contents of @xdr inline + */ +static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr, + void *data) +{ + struct svc_rdma_pullup_data *args = data; + unsigned int remaining; + unsigned long offset; + + if (xdr->head[0].iov_len) + ++args->pd_num_sges; + + offset = offset_in_page(xdr->page_base); + remaining = xdr->page_len; + while (remaining) { + ++args->pd_num_sges; + remaining -= min_t(u32, PAGE_SIZE - offset, remaining); + offset = 0; + } + + if (xdr->tail[0].iov_len) + ++args->pd_num_sges; + + args->pd_length += xdr->len; + return 0; +} + /** * svc_rdma_pull_up_needed - Determine whether to use pull-up * @rdma: controlling transport @@ -539,50 +578,71 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, * @xdr: xdr_buf containing RPC message to transmit * * Returns: - * %true if pull-up must be used - * %false otherwise + * %true if pull-up must be used + * %false otherwise */ -static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *sctxt, +static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma, + const struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr) + const struct xdr_buf *xdr) { - bool write_chunk_present = rctxt && rctxt->rc_write_list; - int elements; + /* Resources needed for the transport header */ + struct svc_rdma_pullup_data args = { + .pd_length = sctxt->sc_hdrbuf.len, + .pd_num_sges = 1, + }; + int ret; - /* For small messages, copying bytes is cheaper than DMA mapping. - */ - if (!write_chunk_present && - sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH) + ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr, + svc_rdma_xb_count_sges, &args); + if (ret < 0) + return false; + + if (args.pd_length < RPCRDMA_PULLUP_THRESH) return true; + return args.pd_num_sges >= rdma->sc_max_send_sges; +} - /* Check whether the xdr_buf has more elements than can - * fit in a single RDMA Send. - */ - /* xdr->head */ - elements = 1; +/** + * svc_rdma_xb_linearize - Copy region of xdr_buf to flat buffer + * @xdr: xdr_buf containing portion of an RPC message to copy + * @data: pointer to arguments + * + * Returns: + * Always zero. + */ +static int svc_rdma_xb_linearize(const struct xdr_buf *xdr, + void *data) +{ + struct svc_rdma_pullup_data *args = data; + unsigned int len, remaining; + unsigned long pageoff; + struct page **ppages; - /* xdr->pages */ - if (!rctxt || !rctxt->rc_write_list) { - unsigned int remaining; - unsigned long pageoff; - - pageoff = xdr->page_base & ~PAGE_MASK; - remaining = xdr->page_len; - while (remaining) { - ++elements; - remaining -= min_t(u32, PAGE_SIZE - pageoff, - remaining); - pageoff = 0; - } + if (xdr->head[0].iov_len) { + memcpy(args->pd_dest, xdr->head[0].iov_base, xdr->head[0].iov_len); + args->pd_dest += xdr->head[0].iov_len; } - /* xdr->tail */ - if (xdr->tail[0].iov_len) - ++elements; + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + pageoff = offset_in_page(xdr->page_base); + remaining = xdr->page_len; + while (remaining) { + len = min_t(u32, PAGE_SIZE - pageoff, remaining); + memcpy(args->pd_dest, page_address(*ppages) + pageoff, len); + remaining -= len; + args->pd_dest += len; + pageoff = 0; + ppages++; + } - /* assume 1 SGE is needed for the transport header */ - return elements >= rdma->sc_max_send_sges; + if (xdr->tail[0].iov_len) { + memcpy(args->pd_dest, xdr->tail[0].iov_base, xdr->tail[0].iov_len); + args->pd_dest += xdr->tail[0].iov_len; + } + + args->pd_length += xdr->len; + return 0; } /** @@ -595,54 +655,30 @@ static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, * The device is not capable of sending the reply directly. * Assemble the elements of @xdr into the transport header buffer. * - * Returns zero on success, or a negative errno on failure. + * Assumptions: + * pull_up_needed has determined that @xdr will fit in the buffer. + * + * Returns: + * %0 if pull-up was successful + * %-EMSGSIZE if a buffer manipulation problem occurred */ -static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, +static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, const struct xdr_buf *xdr) { - unsigned char *dst, *tailbase; - unsigned int taillen; + struct svc_rdma_pullup_data args = { + .pd_dest = sctxt->sc_xprt_buf + sctxt->sc_hdrbuf.len, + }; + int ret; - dst = sctxt->sc_xprt_buf + sctxt->sc_hdrbuf.len; - memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len); - dst += xdr->head[0].iov_len; + ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr, + svc_rdma_xb_linearize, &args); + if (ret < 0) + return ret; - tailbase = xdr->tail[0].iov_base; - taillen = xdr->tail[0].iov_len; - if (rctxt && rctxt->rc_write_list) { - u32 xdrpad; - - xdrpad = xdr_pad_size(xdr->page_len); - if (taillen && xdrpad) { - tailbase += xdrpad; - taillen -= xdrpad; - } - } else { - unsigned int len, remaining; - unsigned long pageoff; - struct page **ppages; - - ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); - pageoff = xdr->page_base & ~PAGE_MASK; - remaining = xdr->page_len; - while (remaining) { - len = min_t(u32, PAGE_SIZE - pageoff, remaining); - - memcpy(dst, page_address(*ppages) + pageoff, len); - remaining -= len; - dst += len; - pageoff = 0; - ppages++; - } - } - - if (taillen) - memcpy(dst, tailbase, taillen); - - sctxt->sc_sges[0].length += xdr->len; - trace_svcrdma_send_pullup(sctxt->sc_sges[0].length); + sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len + args.pd_length; + trace_svcrdma_send_pullup(sctxt, args.pd_length); return 0; } From 2371bcc056647327445150d6df0502d92ad68439 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 Mar 2020 13:29:27 -0400 Subject: [PATCH 015/131] svcrdma: Support multiple Write chunks in svc_rdma_map_reply_msg() Refactor: svc_rdma_map_reply_msg() is restructured to DMA map only the parts of rq_res that do not contain a result payload. This change has been tested to confirm that it does not cause a regression in the no Write chunk and single Write chunk cases. Multiple Write chunks have not been tested. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 +- include/trace/events/rpcrdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_sendto.c | 174 +++++++++++++++----------- 3 files changed, 100 insertions(+), 77 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7090af1a9791..e09fafba00d7 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -213,7 +213,7 @@ extern int svc_rdma_send(struct svcxprt_rdma *rdma, extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr); + const struct xdr_buf *xdr); extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, struct svc_rdma_recv_ctxt *rctxt, diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index afc58accb9cf..054dedd0280c 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1687,6 +1687,7 @@ DECLARE_EVENT_CLASS(svcrdma_dma_map_class, TP_ARGS(rdma, dma_addr, length)) DEFINE_SVC_DMA_EVENT(dma_map_page); +DEFINE_SVC_DMA_EVENT(dma_map_err); DEFINE_SVC_DMA_EVENT(dma_unmap_page); TRACE_EVENT(svcrdma_dma_map_rw_err, diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 50f0216fb08c..78fa57ce424f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -496,39 +496,111 @@ svc_rdma_encode_reply_chunk(struct svc_rdma_recv_ctxt *rctxt, return svc_rdma_encode_write_chunk(sctxt, chunk); } -static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt, - struct page *page, - unsigned long offset, - unsigned int len) +struct svc_rdma_map_data { + struct svcxprt_rdma *md_rdma; + struct svc_rdma_send_ctxt *md_ctxt; +}; + +/** + * svc_rdma_page_dma_map - DMA map one page + * @data: pointer to arguments + * @page: struct page to DMA map + * @offset: offset into the page + * @len: number of bytes to map + * + * Returns: + * %0 if DMA mapping was successful + * %-EIO if the page cannot be DMA mapped + */ +static int svc_rdma_page_dma_map(void *data, struct page *page, + unsigned long offset, unsigned int len) { + struct svc_rdma_map_data *args = data; + struct svcxprt_rdma *rdma = args->md_rdma; + struct svc_rdma_send_ctxt *ctxt = args->md_ctxt; struct ib_device *dev = rdma->sc_cm_id->device; dma_addr_t dma_addr; + ++ctxt->sc_cur_sge_no; + dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE); - trace_svcrdma_dma_map_page(rdma, dma_addr, len); if (ib_dma_mapping_error(dev, dma_addr)) goto out_maperr; + trace_svcrdma_dma_map_page(rdma, dma_addr, len); ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr; ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len; ctxt->sc_send_wr.num_sge++; return 0; out_maperr: + trace_svcrdma_dma_map_err(rdma, dma_addr, len); return -EIO; } -/* ib_dma_map_page() is used here because svc_rdma_dma_unmap() +/** + * svc_rdma_iov_dma_map - DMA map an iovec + * @data: pointer to arguments + * @iov: kvec to DMA map + * + * ib_dma_map_page() is used here because svc_rdma_dma_unmap() * handles DMA-unmap and it uses ib_dma_unmap_page() exclusively. + * + * Returns: + * %0 if DMA mapping was successful + * %-EIO if the iovec cannot be DMA mapped */ -static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt, - unsigned char *base, - unsigned int len) +static int svc_rdma_iov_dma_map(void *data, const struct kvec *iov) { - return svc_rdma_dma_map_page(rdma, ctxt, virt_to_page(base), - offset_in_page(base), len); + if (!iov->iov_len) + return 0; + return svc_rdma_page_dma_map(data, virt_to_page(iov->iov_base), + offset_in_page(iov->iov_base), + iov->iov_len); +} + +/** + * svc_rdma_xb_dma_map - DMA map all segments of an xdr_buf + * @xdr: xdr_buf containing portion of an RPC message to transmit + * @data: pointer to arguments + * + * Returns: + * %0 if DMA mapping was successful + * %-EIO if DMA mapping failed + * + * On failure, any DMA mappings that have been already done must be + * unmapped by the caller. + */ +static int svc_rdma_xb_dma_map(const struct xdr_buf *xdr, void *data) +{ + unsigned int len, remaining; + unsigned long pageoff; + struct page **ppages; + int ret; + + ret = svc_rdma_iov_dma_map(data, &xdr->head[0]); + if (ret < 0) + return ret; + + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + pageoff = offset_in_page(xdr->page_base); + remaining = xdr->page_len; + while (remaining) { + len = min_t(u32, PAGE_SIZE - pageoff, remaining); + + ret = svc_rdma_page_dma_map(data, *ppages++, pageoff, len); + if (ret < 0) + return ret; + + remaining -= len; + pageoff = 0; + } + + ret = svc_rdma_iov_dma_map(data, &xdr->tail[0]); + if (ret < 0) + return ret; + + return xdr->len; } struct svc_rdma_pullup_data { @@ -688,22 +760,22 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma, * @rctxt: Write and Reply chunks provided by client * @xdr: prepared xdr_buf containing RPC message * - * Load the xdr_buf into the ctxt's sge array, and DMA map each - * element as it is added. The Send WR's num_sge field is set. + * Returns: + * %0 if DMA mapping was successful. + * %-EMSGSIZE if a buffer manipulation problem occurred + * %-EIO if DMA mapping failed * - * Returns zero on success, or a negative errno on failure. + * The Send WR's num_sge field is set in all cases. */ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr) + const struct xdr_buf *xdr) { - unsigned int len, remaining; - unsigned long page_off; - struct page **ppages; - unsigned char *base; - u32 xdr_pad; - int ret; + struct svc_rdma_map_data args = { + .md_rdma = rdma, + .md_ctxt = sctxt, + }; /* Set up the (persistently-mapped) transport header SGE. */ sctxt->sc_send_wr.num_sge = 1; @@ -712,7 +784,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, /* If there is a Reply chunk, nothing follows the transport * header, and we're done here. */ - if (rctxt && rctxt->rc_reply_chunk) + if (!pcl_is_empty(&rctxt->rc_reply_pcl)) return 0; /* For pull-up, svc_rdma_send() will sync the transport header. @@ -721,58 +793,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr)) return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr); - ++sctxt->sc_cur_sge_no; - ret = svc_rdma_dma_map_buf(rdma, sctxt, - xdr->head[0].iov_base, - xdr->head[0].iov_len); - if (ret < 0) - return ret; - - /* If a Write chunk is present, the xdr_buf's page list - * is not included inline. However the Upper Layer may - * have added XDR padding in the tail buffer, and that - * should not be included inline. - */ - if (rctxt && rctxt->rc_write_list) { - base = xdr->tail[0].iov_base; - len = xdr->tail[0].iov_len; - xdr_pad = xdr_pad_size(xdr->page_len); - - if (len && xdr_pad) { - base += xdr_pad; - len -= xdr_pad; - } - - goto tail; - } - - ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); - page_off = xdr->page_base & ~PAGE_MASK; - remaining = xdr->page_len; - while (remaining) { - len = min_t(u32, PAGE_SIZE - page_off, remaining); - - ++sctxt->sc_cur_sge_no; - ret = svc_rdma_dma_map_page(rdma, sctxt, *ppages++, - page_off, len); - if (ret < 0) - return ret; - - remaining -= len; - page_off = 0; - } - - base = xdr->tail[0].iov_base; - len = xdr->tail[0].iov_len; -tail: - if (len) { - ++sctxt->sc_cur_sge_no; - ret = svc_rdma_dma_map_buf(rdma, sctxt, base, len); - if (ret < 0) - return ret; - } - - return 0; + return pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr, + svc_rdma_xb_dma_map, &args); } /* The svc_rqst and all resources it owns are released as soon as From 41bc163ffe0fe67cba3fff2f5e8c58caa9e46a1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 Mar 2020 13:29:28 -0400 Subject: [PATCH 016/131] svcrdma: Support multiple Write chunks in svc_rdma_send_reply_chunk Refactor svc_rdma_send_reply_chunk() so that it Sends only the parts of rq_res that do not contain a result payload. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 +- net/sunrpc/xprtrdma/svc_rdma_rw.c | 36 ++++++++----------------------- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index e09fafba00d7..85fbec47d4b5 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -200,7 +200,7 @@ extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr); + const struct xdr_buf *xdr); /* svc_rdma_sendto.c */ extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index ed7def7b801b..4a0ece9aa385 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -537,7 +537,7 @@ static int svc_rdma_pages_write(struct svc_rdma_write_info *info, /** * svc_rdma_xb_write - Construct RDMA Writes to write an xdr_buf * @xdr: xdr_buf to write - * @info: pointer to write arguments + * @data: pointer to write arguments * * Returns: * On succes, returns zero @@ -545,9 +545,9 @@ static int svc_rdma_pages_write(struct svc_rdma_write_info *info, * %-ENOMEM if a resource has been exhausted * %-EIO if an rdma-rw error occurred */ -static int svc_rdma_xb_write(const struct xdr_buf *xdr, - struct svc_rdma_write_info *info) +static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data) { + struct svc_rdma_write_info *info = data; int ret; if (xdr->head[0].iov_len) { @@ -627,11 +627,11 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, */ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr) + const struct xdr_buf *xdr) { struct svc_rdma_write_info *info; struct svc_rdma_chunk *chunk; - int consumed, ret; + int ret; if (pcl_is_empty(&rctxt->rc_reply_pcl)) return 0; @@ -641,35 +641,17 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, if (!info) return -ENOMEM; - ret = svc_rdma_iov_write(info, &xdr->head[0]); + ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr, + svc_rdma_xb_write, info); if (ret < 0) goto out_err; - consumed = xdr->head[0].iov_len; - - /* Send the page list in the Reply chunk only if the - * client did not provide Write chunks. - */ - if (pcl_is_empty(&rctxt->rc_write_pcl) && xdr->page_len) { - ret = svc_rdma_pages_write(info, xdr, xdr->head[0].iov_len, - xdr->page_len); - if (ret < 0) - goto out_err; - consumed += xdr->page_len; - } - - if (xdr->tail[0].iov_len) { - ret = svc_rdma_iov_write(info, &xdr->tail[0]); - if (ret < 0) - goto out_err; - consumed += xdr->tail[0].iov_len; - } ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); if (ret < 0) goto out_err; - trace_svcrdma_send_reply_chunk(consumed); - return consumed; + trace_svcrdma_send_reply_chunk(xdr->len); + return xdr->len; out_err: svc_rdma_write_info_free(info); From 7954c8503b8709660d93505a40f1847634d9c3ba Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 17 Jun 2020 15:19:36 -0400 Subject: [PATCH 017/131] svcrdma: Remove chunk list pointers Clean up: These pointers are no longer used. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 ---- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 8 +------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 85fbec47d4b5..6f247d043731 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -149,12 +149,8 @@ struct svc_rdma_recv_ctxt { struct svc_rdma_pcl rc_call_pcl; struct svc_rdma_pcl rc_read_pcl; - - __be32 *rc_write_list; struct svc_rdma_chunk *rc_cur_result_payload; struct svc_rdma_pcl rc_write_pcl; - - __be32 *rc_reply_chunk; struct svc_rdma_pcl rc_reply_pcl; struct page *rc_pages[RPCSVC_MAXPAGES]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index af32c3ad45a6..dd10b1de227d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -540,17 +540,13 @@ static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt) p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); if (!p) return false; - - rctxt->rc_write_list = NULL; if (!xdr_count_write_chunks(rctxt, p)) return false; if (!pcl_alloc_write(rctxt, &rctxt->rc_write_pcl, p)) return false; - if (!pcl_is_empty(&rctxt->rc_write_pcl)) - rctxt->rc_write_list = p; rctxt->rc_cur_result_payload = pcl_first_chunk(&rctxt->rc_write_pcl); - return rctxt->rc_write_pcl.cl_count < 2; + return true; } /* Sanity check the Reply chunk. @@ -573,13 +569,11 @@ static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt) if (!p) return false; - rctxt->rc_reply_chunk = NULL; if (!xdr_item_is_present(p)) return true; if (!xdr_check_write_chunk(rctxt)) return false; - rctxt->rc_reply_chunk = p; rctxt->rc_reply_pcl.cl_count = 1; return pcl_alloc_write(rctxt, &rctxt->rc_reply_pcl, p); } From b704be09dccf00b14e0b22a4e849e3ce7a10acd2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Jun 2020 13:28:28 -0400 Subject: [PATCH 018/131] svcrdma: Clean up chunk tracepoints We already have trace_svcrdma_decode_rseg(), which records each ingress Read segment. Instead of reporting those again when they are about to be posted as RDMA Reads, let's fire one tracepoint before posting each type of chunk. So we'll get: nfsd-1998 [002] 321.666615: svcrdma_decode_rseg: cq.id=4 cid=42 segno=0 position=0 192@0x013ca9ebfae14000:0xb0010b05 nfsd-1998 [002] 321.666615: svcrdma_decode_rseg: cq.id=4 cid=42 segno=1 position=0 7688@0x013ca9ebf914e000:0xb0010a05 nfsd-1998 [002] 321.666615: svcrdma_decode_rseg: cq.id=4 cid=42 segno=2 position=0 28@0x013ca9ebfae15000:0xb0010905 nfsd-1998 [002] 321.666622: svcrdma_decode_rqst: cq.id=4 cid=42 xid=0x013ca9eb vers=1 credits=128 proc=RDMA_NOMSG hdrlen=100 nfsd-1998 [002] 321.666642: svcrdma_post_read_chunk: cq.id=3 cid=112 sqecount=3 kworker/2:1H-221 [002] 321.673949: svcrdma_wc_read: cq.id=3 cid=112 status=SUCCESS (0/0x0) Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 110 ++++---------------------- net/sunrpc/xprtrdma/svc_rdma_rw.c | 27 +++---- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 - 3 files changed, 26 insertions(+), 113 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 054dedd0280c..896aafc37b09 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1410,45 +1410,6 @@ DEFINE_BADREQ_EVENT(drop); DEFINE_BADREQ_EVENT(badproc); DEFINE_BADREQ_EVENT(parse); -DECLARE_EVENT_CLASS(svcrdma_segment_event, - TP_PROTO( - u32 handle, - u32 length, - u64 offset - ), - - TP_ARGS(handle, length, offset), - - TP_STRUCT__entry( - __field(u32, handle) - __field(u32, length) - __field(u64, offset) - ), - - TP_fast_assign( - __entry->handle = handle; - __entry->length = length; - __entry->offset = offset; - ), - - TP_printk("%u@0x%016llx:0x%08x", - __entry->length, (unsigned long long)__entry->offset, - __entry->handle - ) -); - -#define DEFINE_SEGMENT_EVENT(name) \ - DEFINE_EVENT(svcrdma_segment_event, svcrdma_##name,\ - TP_PROTO( \ - u32 handle, \ - u32 length, \ - u64 offset \ - ), \ - TP_ARGS(handle, length, offset)) - -DEFINE_SEGMENT_EVENT(send_rseg); -DEFINE_SEGMENT_EVENT(send_wseg); - TRACE_EVENT(svcrdma_encode_wseg, TP_PROTO( const struct svc_rdma_send_ctxt *ctxt, @@ -1558,62 +1519,6 @@ TRACE_EVENT(svcrdma_decode_wseg, ) ); -DECLARE_EVENT_CLASS(svcrdma_chunk_event, - TP_PROTO( - u32 length - ), - - TP_ARGS(length), - - TP_STRUCT__entry( - __field(u32, length) - ), - - TP_fast_assign( - __entry->length = length; - ), - - TP_printk("length=%u", - __entry->length - ) -); - -#define DEFINE_CHUNK_EVENT(name) \ - DEFINE_EVENT(svcrdma_chunk_event, svcrdma_##name, \ - TP_PROTO( \ - u32 length \ - ), \ - TP_ARGS(length)) - -DEFINE_CHUNK_EVENT(send_pzr); -DEFINE_CHUNK_EVENT(encode_write_chunk); -DEFINE_CHUNK_EVENT(send_write_chunk); -DEFINE_CHUNK_EVENT(encode_read_chunk); -DEFINE_CHUNK_EVENT(send_reply_chunk); - -TRACE_EVENT(svcrdma_send_read_chunk, - TP_PROTO( - u32 length, - u32 position - ), - - TP_ARGS(length, position), - - TP_STRUCT__entry( - __field(u32, length) - __field(u32, position) - ), - - TP_fast_assign( - __entry->length = length; - __entry->position = position; - ), - - TP_printk("length=%u position=%u", - __entry->length, __entry->position - ) -); - DECLARE_EVENT_CLASS(svcrdma_error_event, TP_PROTO( __be32 xid @@ -1936,7 +1841,7 @@ TRACE_EVENT(svcrdma_rq_post_err, ) ); -TRACE_EVENT(svcrdma_post_chunk, +DECLARE_EVENT_CLASS(svcrdma_post_chunk_class, TP_PROTO( const struct rpc_rdma_cid *cid, int sqecount @@ -1962,6 +1867,19 @@ TRACE_EVENT(svcrdma_post_chunk, ) ); +#define DEFINE_POST_CHUNK_EVENT(name) \ + DEFINE_EVENT(svcrdma_post_chunk_class, \ + svcrdma_post_##name##_chunk, \ + TP_PROTO( \ + const struct rpc_rdma_cid *cid, \ + int sqecount \ + ), \ + TP_ARGS(cid, sqecount)) + +DEFINE_POST_CHUNK_EVENT(read); +DEFINE_POST_CHUNK_EVENT(write); +DEFINE_POST_CHUNK_EVENT(reply); + DEFINE_COMPLETION_EVENT(svcrdma_wc_read); DEFINE_COMPLETION_EVENT(svcrdma_wc_write); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 4a0ece9aa385..b04c700862e9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -358,7 +358,6 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) do { if (atomic_sub_return(cc->cc_sqecount, &rdma->sc_sq_avail) > 0) { - trace_svcrdma_post_chunk(&cc->cc_cid, cc->cc_sqecount); ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); if (ret) break; @@ -470,8 +469,6 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, if (ret < 0) return -EIO; - trace_svcrdma_send_wseg(seg->rs_handle, write_len, offset); - list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; if (write_len == seg->rs_length - info->wi_seg_off) { @@ -590,21 +587,22 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, const struct xdr_buf *xdr) { struct svc_rdma_write_info *info; + struct svc_rdma_chunk_ctxt *cc; int ret; info = svc_rdma_write_info_alloc(rdma, chunk); if (!info) return -ENOMEM; + cc = &info->wi_cc; ret = svc_rdma_xb_write(xdr, info); if (ret != xdr->len) goto out_err; - ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); + trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount); + ret = svc_rdma_post_chunk_ctxt(cc); if (ret < 0) goto out_err; - - trace_svcrdma_send_write_chunk(xdr->page_len); return xdr->len; out_err: @@ -630,6 +628,7 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct xdr_buf *xdr) { struct svc_rdma_write_info *info; + struct svc_rdma_chunk_ctxt *cc; struct svc_rdma_chunk *chunk; int ret; @@ -640,17 +639,18 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, info = svc_rdma_write_info_alloc(rdma, chunk); if (!info) return -ENOMEM; + cc = &info->wi_cc; ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr, svc_rdma_xb_write, info); if (ret < 0) goto out_err; - ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); + trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount); + ret = svc_rdma_post_chunk_ctxt(cc); if (ret < 0) goto out_err; - trace_svcrdma_send_reply_chunk(xdr->len); return xdr->len; out_err: @@ -737,10 +737,8 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, if (ret < 0) break; - trace_svcrdma_send_rseg(handle, length, offset); info->ri_chunklen += length; } - return ret; } @@ -762,8 +760,6 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, if (ret < 0) goto out; - trace_svcrdma_send_read_chunk(info->ri_chunklen, info->ri_position); - head->rc_hdr_count = 0; /* Split the Receive buffer between the head and tail @@ -818,8 +814,6 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp, if (ret < 0) goto out; - trace_svcrdma_send_pzr(info->ri_chunklen); - head->rc_arg.len += info->ri_chunklen; head->rc_arg.buflen += info->ri_chunklen; @@ -876,6 +870,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head, __be32 *p) { struct svc_rdma_read_info *info; + struct svc_rdma_chunk_ctxt *cc; int ret; /* The request (with page list) is constructed in @@ -893,6 +888,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, info = svc_rdma_read_info_alloc(rdma); if (!info) return -ENOMEM; + cc = &info->ri_cc; info->ri_readctxt = head; info->ri_pageno = 0; info->ri_pageoff = 0; @@ -905,7 +901,8 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, if (ret < 0) goto out_err; - ret = svc_rdma_post_chunk_ctxt(&info->ri_cc); + trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount); + ret = svc_rdma_post_chunk_ctxt(cc); if (ret < 0) goto out_err; svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 78fa57ce424f..68af79d4f04f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -411,8 +411,6 @@ static ssize_t svc_rdma_encode_write_chunk(struct svc_rdma_send_ctxt *sctxt, unsigned int segno; ssize_t len, ret; - trace_svcrdma_encode_write_chunk(remaining); - len = 0; ret = xdr_stream_encode_item_present(&sctxt->sc_stream); if (ret < 0) From bafe9c27d537e7bcfacb227413bdaff2dce53d09 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 17 Jul 2020 15:05:51 -0400 Subject: [PATCH 019/131] svcrdma: Rename info::ri_chunklen I'm about to change the purpose of ri_chunklen: Instead of tracking the number of bytes in one Read chunk, it will track the total number of bytes in the Read list. Rename it for clarity. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index b04c700862e9..3154c7ab1ca8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -262,7 +262,7 @@ struct svc_rdma_read_info { unsigned int ri_position; unsigned int ri_pageno; unsigned int ri_pageoff; - unsigned int ri_chunklen; + unsigned int ri_totalbytes; struct svc_rdma_chunk_ctxt ri_cc; }; @@ -726,7 +726,6 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, int ret; ret = -EINVAL; - info->ri_chunklen = 0; while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) { u32 handle, length; u64 offset; @@ -737,7 +736,7 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, if (ret < 0) break; - info->ri_chunklen += length; + info->ri_totalbytes += length; } return ret; } @@ -754,6 +753,8 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, __be32 *p) { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; + struct xdr_buf *buf = &head->rc_arg; + unsigned int length; int ret; ret = svc_rdma_build_read_chunk(rqstp, info, p); @@ -782,11 +783,10 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, * Currently these chunks always start at page offset 0, * thus the rounded-up length never crosses a page boundary. */ - info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2; - - head->rc_arg.page_len = info->ri_chunklen; - head->rc_arg.len += info->ri_chunklen; - head->rc_arg.buflen += info->ri_chunklen; + length = XDR_QUADLEN(info->ri_totalbytes) << 2; + buf->page_len = length; + buf->len += length; + buf->buflen += length; out: return ret; @@ -808,22 +808,20 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp, __be32 *p) { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; + struct xdr_buf *buf = &head->rc_arg; int ret; ret = svc_rdma_build_read_chunk(rqstp, info, p); if (ret < 0) goto out; - head->rc_arg.len += info->ri_chunklen; - head->rc_arg.buflen += info->ri_chunklen; + buf->len += info->ri_totalbytes; + buf->buflen += info->ri_totalbytes; head->rc_hdr_count = 1; - head->rc_arg.head[0].iov_base = page_address(head->rc_pages[0]); - head->rc_arg.head[0].iov_len = min_t(size_t, PAGE_SIZE, - info->ri_chunklen); - - head->rc_arg.page_len = info->ri_chunklen - - head->rc_arg.head[0].iov_len; + buf->head[0].iov_base = page_address(head->rc_pages[0]); + buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); + buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; out: return ret; @@ -892,6 +890,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, info->ri_readctxt = head; info->ri_pageno = 0; info->ri_pageoff = 0; + info->ri_totalbytes = 0; info->ri_position = be32_to_cpup(p + 1); if (info->ri_position) From d96962e6d0e281bab6a48e83b42f5dce6eb28bf4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Sep 2020 13:04:17 -0400 Subject: [PATCH 020/131] svcrdma: Use the new parsed chunk list when pulling Read chunks As a pre-requisite for handling multiple Read chunks in each Read list, convert svc_rdma_recv_read_chunk() to use the new parsed Read chunk list. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 6 +- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 16 ++- net/sunrpc/xprtrdma/svc_rdma_rw.c | 160 +++++++++++++++--------- 3 files changed, 111 insertions(+), 71 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 6f247d043731..294b56e61522 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -188,15 +188,15 @@ extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); -extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, - struct svc_rqst *rqstp, - struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_chunk *chunk, const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, const struct xdr_buf *xdr); +extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, + struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head); /* svc_rdma_sendto.c */ extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index dd10b1de227d..cbdb71247755 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -824,7 +824,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) struct svcxprt_rdma *rdma_xprt = container_of(xprt, struct svcxprt_rdma, sc_xprt); struct svc_rdma_recv_ctxt *ctxt; - __be32 *p; int ret; rqstp->rq_xprt_ctxt = NULL; @@ -857,7 +856,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = rqstp->rq_pages; rqstp->rq_next_page = rqstp->rq_respages; - p = (__be32 *)rqstp->rq_arg.head[0].iov_base; ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); if (ret < 0) goto out_err; @@ -870,9 +868,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) svc_rdma_get_inv_rkey(rdma_xprt, ctxt); - p += rpcrdma_fixed_maxsz; - if (*p != xdr_zero) - goto out_readchunk; + if (!pcl_is_empty(&ctxt->rc_read_pcl) || + !pcl_is_empty(&ctxt->rc_call_pcl)) + goto out_readlist; complete: rqstp->rq_xprt_ctxt = ctxt; @@ -880,10 +878,10 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) svc_xprt_copy_addrs(rqstp, xprt); return rqstp->rq_arg.len; -out_readchunk: - ret = svc_rdma_recv_read_chunk(rdma_xprt, rqstp, ctxt, p); +out_readlist: + ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); if (ret < 0) - goto out_postfail; + goto out_readfail; return 0; out_err: @@ -891,7 +889,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return 0; -out_postfail: +out_readfail: if (ret == -EINVAL) svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 3154c7ab1ca8..2d454e37b2fe 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -258,8 +258,8 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) /* State for pulling a Read chunk. */ struct svc_rdma_read_info { + struct svc_rqst *ri_rqst; struct svc_rdma_recv_ctxt *ri_readctxt; - unsigned int ri_position; unsigned int ri_pageno; unsigned int ri_pageoff; unsigned int ri_totalbytes; @@ -658,17 +658,29 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, return ret; } +/** + * svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment + * @info: context for ongoing I/O + * @segment: co-ordinates of remote memory to be read + * + * Returns: + * %0: the Read WR chain was constructed successfully + * %-EINVAL: there were not enough rq_pages to finish + * %-ENOMEM: allocating a local resources failed + * %-EIO: a DMA mapping error occurred + */ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, - struct svc_rqst *rqstp, - u32 rkey, u32 len, u64 offset) + const struct svc_rdma_segment *segment) { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; struct svc_rdma_chunk_ctxt *cc = &info->ri_cc; + struct svc_rqst *rqstp = info->ri_rqst; struct svc_rdma_rw_ctxt *ctxt; - unsigned int sge_no, seg_len; + unsigned int sge_no, seg_len, len; struct scatterlist *sg; int ret; + len = segment->rs_length; sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT; ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no); if (!ctxt) @@ -702,8 +714,8 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, goto out_overrun; } - ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, offset, rkey, - DMA_FROM_DEVICE); + ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, segment->rs_offset, + segment->rs_handle, DMA_FROM_DEVICE); if (ret < 0) return -EIO; @@ -716,48 +728,63 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, return -EINVAL; } -/* Walk the segments in the Read chunk starting at @p and construct - * RDMA Read operations to pull the chunk to the server. +/** + * svc_rdma_build_read_chunk - Build RDMA Read WQEs to pull one RDMA chunk + * @info: context for ongoing I/O + * @chunk: Read chunk to pull + * + * Return values: + * %0: the Read WR chain was constructed successfully + * %-EINVAL: there were not enough resources to finish + * %-ENOMEM: allocating a local resources failed + * %-EIO: a DMA mapping error occurred */ -static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, - struct svc_rdma_read_info *info, - __be32 *p) +static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info, + const struct svc_rdma_chunk *chunk) { + const struct svc_rdma_segment *segment; int ret; ret = -EINVAL; - while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) { - u32 handle, length; - u64 offset; - - p = xdr_decode_rdma_segment(p, &handle, &length, &offset); - ret = svc_rdma_build_read_segment(info, rqstp, handle, length, - offset); + pcl_for_each_segment(segment, chunk) { + ret = svc_rdma_build_read_segment(info, segment); if (ret < 0) break; - - info->ri_totalbytes += length; + info->ri_totalbytes += segment->rs_length; } return ret; } -/* Construct RDMA Reads to pull over a normal Read chunk. The chunk - * data lands in the page list of head->rc_arg.pages. +/** + * svc_rdma_read_data_items - Construct RDMA Reads to pull data item Read chunks + * @info: context for RDMA Reads + * + * The chunk data lands in the page list of head->rc_arg.pages. * * Currently NFSD does not look at the head->rc_arg.tail[0] iovec. * Therefore, XDR round-up of the Read chunk and trailing * inline content must both be added at the end of the pagelist. + * + * Return values: + * %0: RDMA Read WQEs were successfully built + * %-EINVAL: client provided too many chunks or segments, + * %-ENOMEM: rdma_rw context pool was exhausted, + * %-ENOTCONN: posting failed (connection is lost), + * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, - struct svc_rdma_read_info *info, - __be32 *p) +static int svc_rdma_read_data_items(struct svc_rdma_read_info *info) { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; struct xdr_buf *buf = &head->rc_arg; + struct svc_rdma_chunk *chunk; unsigned int length; int ret; - ret = svc_rdma_build_read_chunk(rqstp, info, p); + if (head->rc_read_pcl.cl_count > 1) + return -EINVAL; + + chunk = pcl_first_chunk(&head->rc_read_pcl); + ret = svc_rdma_build_read_chunk(info, chunk); if (ret < 0) goto out; @@ -768,11 +795,9 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, * chunk is not included in either the pagelist or in * the tail. */ - head->rc_arg.tail[0].iov_base = - head->rc_arg.head[0].iov_base + info->ri_position; - head->rc_arg.tail[0].iov_len = - head->rc_arg.head[0].iov_len - info->ri_position; - head->rc_arg.head[0].iov_len = info->ri_position; + buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position; + buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position; + buf->head[0].iov_len = chunk->ch_position; /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2). * @@ -792,26 +817,36 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, return ret; } -/* Construct RDMA Reads to pull over a Position Zero Read chunk. - * The start of the data lands in the first page just after - * the Transport header, and the rest lands in the page list of +/** + * svc_rdma_read_special - Build RDMA Read WQEs to pull a Long Message + * @info: context for RDMA Reads + * + * The start of the data lands in the first page just after the + * Transport header, and the rest lands in the page list of * head->rc_arg.pages. * * Assumptions: - * - A PZRC has an XDR-aligned length (no implicit round-up). - * - There can be no trailing inline content (IOW, we assume - * a PZRC is never sent in an RDMA_MSG message, though it's - * allowed by spec). + * - A PZRC is never sent in an RDMA_MSG message, though it's + * allowed by spec. + * + * Return values: + * %0: RDMA Read WQEs were successfully built + * %-EINVAL: client provided too many chunks or segments, + * %-ENOMEM: rdma_rw context pool was exhausted, + * %-ENOTCONN: posting failed (connection is lost), + * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp, - struct svc_rdma_read_info *info, - __be32 *p) +static int svc_rdma_read_special(struct svc_rdma_read_info *info) { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; struct xdr_buf *buf = &head->rc_arg; int ret; - ret = svc_rdma_build_read_chunk(rqstp, info, p); + if (head->rc_call_pcl.cl_count > 1) + return -EINVAL; + + ret = svc_rdma_build_read_chunk(info, + pcl_first_chunk(&head->rc_call_pcl)); if (ret < 0) goto out; @@ -848,24 +883,31 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, } /** - * svc_rdma_recv_read_chunk - Pull a Read chunk from the client + * svc_rdma_process_read_list - Pull list of Read chunks from the client * @rdma: controlling RDMA transport * @rqstp: set of pages to use as Read sink buffers * @head: pages under I/O collect here - * @p: pointer to start of Read chunk * - * Returns: - * %0 if all needed RDMA Reads were posted successfully, - * %-EINVAL if client provided too many segments, - * %-ENOMEM if rdma_rw context pool was exhausted, - * %-ENOTCONN if posting failed (connection is lost), - * %-EIO if rdma_rw initialization failed (DMA mapping, etc). + * The RPC/RDMA protocol assumes that the upper layer's XDR decoders + * pull each Read chunk as they decode an incoming RPC message. * - * Assumptions: - * - All Read segments in @p have the same Position value. + * On Linux, however, the server needs to have a fully-constructed RPC + * message in rqstp->rq_arg when there is a positive return code from + * ->xpo_recvfrom. So the Read list is safety-checked immediately when + * it is received, then here the whole Read list is pulled all at once. + * The ingress RPC message is fully reconstructed once all associated + * RDMA Reads have completed. + * + * Return values: + * %1: all needed RDMA Reads were posted successfully, + * %-EINVAL: client provided too many chunks or segments, + * %-ENOMEM: rdma_rw context pool was exhausted, + * %-ENOTCONN: posting failed (connection is lost), + * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, - struct svc_rdma_recv_ctxt *head, __be32 *p) +int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, + struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head) { struct svc_rdma_read_info *info; struct svc_rdma_chunk_ctxt *cc; @@ -887,16 +929,16 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, if (!info) return -ENOMEM; cc = &info->ri_cc; + info->ri_rqst = rqstp; info->ri_readctxt = head; info->ri_pageno = 0; info->ri_pageoff = 0; info->ri_totalbytes = 0; - info->ri_position = be32_to_cpup(p + 1); - if (info->ri_position) - ret = svc_rdma_build_normal_read_chunk(rqstp, info, p); + if (pcl_is_empty(&head->rc_call_pcl)) + ret = svc_rdma_read_data_items(info); else - ret = svc_rdma_build_pz_read_chunk(rqstp, info, p); + ret = svc_rdma_read_special(info); if (ret < 0) goto out_err; @@ -905,7 +947,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, if (ret < 0) goto out_err; svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count); - return 0; + return 1; out_err: svc_rdma_read_info_free(info); From d7cc73972661be4a02a1b09f1d9b3283c6c05154 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 5 Aug 2020 14:59:05 -0400 Subject: [PATCH 021/131] svcrdma: support multiple Read chunks per RPC An efficient way to handle multiple Read chunks is to post them all together and then take a single completion. This is also how the code is already structured: when the Read completion fires, all portions of the incoming RPC message are available to be assembled. The difficult problem is setting up the Read sink buffers so that the server pulls the client's data into place, making subsequent pull-up unnecessary. There are several cases: * No Read chunks. No-op. * One data item Read chunk. This is the fast case, where the inline part of the RPC-over-RDMA message becomes the head and tail, and the data item chunk is placed in buf->pages. * A Position-zero Read chunk. Treated like TCP: the Read chunk is pulled into contiguous pages. + A Position-zero Read chunk with data item chunks. Treated like TCP: all of the Read chunks are pulled into contiguous pages. + Multiple data item chunks. Treated like TCP: the inline part is copied and the data item chunks are pulled into contiguous pages. The "*" cases are already supported. This patch adds support for the "+" cases. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 236 ++++++++++++++++++++++++++++-- 1 file changed, 222 insertions(+), 14 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 2d454e37b2fe..0b63e1321d74 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -756,7 +756,121 @@ static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info, } /** - * svc_rdma_read_data_items - Construct RDMA Reads to pull data item Read chunks + * svc_rdma_copy_inline_range - Copy part of the inline content into pages + * @info: context for RDMA Reads + * @offset: offset into the Receive buffer of region to copy + * @remaining: length of region to copy + * + * Take a page at a time from rqstp->rq_pages and copy the inline + * content from the Receive buffer into that page. Update + * info->ri_pageno and info->ri_pageoff so that the next RDMA Read + * result will land contiguously with the copied content. + * + * Return values: + * %0: Inline content was successfully copied + * %-EINVAL: offset or length was incorrect + */ +static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info, + unsigned int offset, + unsigned int remaining) +{ + struct svc_rdma_recv_ctxt *head = info->ri_readctxt; + unsigned char *dst, *src = head->rc_recv_buf; + struct svc_rqst *rqstp = info->ri_rqst; + unsigned int page_no, numpages; + + numpages = PAGE_ALIGN(info->ri_pageoff + remaining) >> PAGE_SHIFT; + for (page_no = 0; page_no < numpages; page_no++) { + unsigned int page_len; + + page_len = min_t(unsigned int, remaining, + PAGE_SIZE - info->ri_pageoff); + + head->rc_arg.pages[info->ri_pageno] = + rqstp->rq_pages[info->ri_pageno]; + if (!info->ri_pageoff) + head->rc_page_count++; + + dst = page_address(head->rc_arg.pages[info->ri_pageno]); + memcpy(dst + info->ri_pageno, src + offset, page_len); + + info->ri_totalbytes += page_len; + info->ri_pageoff += page_len; + if (info->ri_pageoff == PAGE_SIZE) { + info->ri_pageno++; + info->ri_pageoff = 0; + } + remaining -= page_len; + offset += page_len; + } + + return -EINVAL; +} + +/** + * svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks + * @info: context for RDMA Reads + * + * The chunk data lands in head->rc_arg as a series of contiguous pages, + * like an incoming TCP call. + * + * Return values: + * %0: RDMA Read WQEs were successfully built + * %-EINVAL: client provided too many chunks or segments, + * %-ENOMEM: rdma_rw context pool was exhausted, + * %-ENOTCONN: posting failed (connection is lost), + * %-EIO: rdma_rw initialization failed (DMA mapping, etc). + */ +static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *info) +{ + struct svc_rdma_recv_ctxt *head = info->ri_readctxt; + const struct svc_rdma_pcl *pcl = &head->rc_read_pcl; + struct svc_rdma_chunk *chunk, *next; + struct xdr_buf *buf = &head->rc_arg; + unsigned int start, length; + int ret; + + start = 0; + chunk = pcl_first_chunk(pcl); + length = chunk->ch_position; + ret = svc_rdma_copy_inline_range(info, start, length); + if (ret < 0) + return ret; + + pcl_for_each_chunk(chunk, pcl) { + ret = svc_rdma_build_read_chunk(info, chunk); + if (ret < 0) + return ret; + + next = pcl_next_chunk(pcl, chunk); + if (!next) + break; + + start += length; + length = next->ch_position - info->ri_totalbytes; + ret = svc_rdma_copy_inline_range(info, start, length); + if (ret < 0) + return ret; + } + + start += length; + length = head->rc_byte_len - start; + ret = svc_rdma_copy_inline_range(info, start, length); + if (ret < 0) + return ret; + + buf->len += info->ri_totalbytes; + buf->buflen += info->ri_totalbytes; + + head->rc_hdr_count = 1; + buf->head[0].iov_base = page_address(head->rc_pages[0]); + buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); + buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; + return 0; +} + +/** + * svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks * @info: context for RDMA Reads * * The chunk data lands in the page list of head->rc_arg.pages. @@ -772,7 +886,7 @@ static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info, * %-ENOTCONN: posting failed (connection is lost), * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -static int svc_rdma_read_data_items(struct svc_rdma_read_info *info) +static int svc_rdma_read_data_item(struct svc_rdma_read_info *info) { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; struct xdr_buf *buf = &head->rc_arg; @@ -780,9 +894,6 @@ static int svc_rdma_read_data_items(struct svc_rdma_read_info *info) unsigned int length; int ret; - if (head->rc_read_pcl.cl_count > 1) - return -EINVAL; - chunk = pcl_first_chunk(&head->rc_read_pcl); ret = svc_rdma_build_read_chunk(info, chunk); if (ret < 0) @@ -817,6 +928,104 @@ static int svc_rdma_read_data_items(struct svc_rdma_read_info *info) return ret; } +/** + * svc_rdma_read_chunk_range - Build RDMA Read WQEs for portion of a chunk + * @info: context for RDMA Reads + * @chunk: parsed Call chunk to pull + * @offset: offset of region to pull + * @length: length of region to pull + * + * Return values: + * %0: RDMA Read WQEs were successfully built + * %-EINVAL: there were not enough resources to finish + * %-ENOMEM: rdma_rw context pool was exhausted, + * %-ENOTCONN: posting failed (connection is lost), + * %-EIO: rdma_rw initialization failed (DMA mapping, etc). + */ +static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info, + const struct svc_rdma_chunk *chunk, + unsigned int offset, unsigned int length) +{ + const struct svc_rdma_segment *segment; + int ret; + + ret = -EINVAL; + pcl_for_each_segment(segment, chunk) { + struct svc_rdma_segment dummy; + + if (offset > segment->rs_length) { + offset -= segment->rs_length; + continue; + } + + dummy.rs_handle = segment->rs_handle; + dummy.rs_length = min_t(u32, length, segment->rs_length) - offset; + dummy.rs_offset = segment->rs_offset + offset; + + ret = svc_rdma_build_read_segment(info, &dummy); + if (ret < 0) + break; + + info->ri_totalbytes += dummy.rs_length; + length -= dummy.rs_length; + offset = 0; + } + return ret; +} + +/** + * svc_rdma_read_call_chunk - Build RDMA Read WQEs to pull a Long Message + * @info: context for RDMA Reads + * + * Return values: + * %0: RDMA Read WQEs were successfully built + * %-EINVAL: there were not enough resources to finish + * %-ENOMEM: rdma_rw context pool was exhausted, + * %-ENOTCONN: posting failed (connection is lost), + * %-EIO: rdma_rw initialization failed (DMA mapping, etc). + */ +static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) +{ + struct svc_rdma_recv_ctxt *head = info->ri_readctxt; + const struct svc_rdma_chunk *call_chunk = + pcl_first_chunk(&head->rc_call_pcl); + const struct svc_rdma_pcl *pcl = &head->rc_read_pcl; + struct svc_rdma_chunk *chunk, *next; + unsigned int start, length; + int ret; + + if (pcl_is_empty(pcl)) + return svc_rdma_build_read_chunk(info, call_chunk); + + start = 0; + chunk = pcl_first_chunk(pcl); + length = chunk->ch_position; + ret = svc_rdma_read_chunk_range(info, call_chunk, start, length); + if (ret < 0) + return ret; + + pcl_for_each_chunk(chunk, pcl) { + ret = svc_rdma_build_read_chunk(info, chunk); + if (ret < 0) + return ret; + + next = pcl_next_chunk(pcl, chunk); + if (!next) + break; + + start += length; + length = next->ch_position - info->ri_totalbytes; + ret = svc_rdma_read_chunk_range(info, call_chunk, + start, length); + if (ret < 0) + return ret; + } + + start += length; + length = call_chunk->ch_length - start; + return svc_rdma_read_chunk_range(info, call_chunk, start, length); +} + /** * svc_rdma_read_special - Build RDMA Read WQEs to pull a Long Message * @info: context for RDMA Reads @@ -836,17 +1045,13 @@ static int svc_rdma_read_data_items(struct svc_rdma_read_info *info) * %-ENOTCONN: posting failed (connection is lost), * %-EIO: rdma_rw initialization failed (DMA mapping, etc). */ -static int svc_rdma_read_special(struct svc_rdma_read_info *info) +static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info) { struct svc_rdma_recv_ctxt *head = info->ri_readctxt; struct xdr_buf *buf = &head->rc_arg; int ret; - if (head->rc_call_pcl.cl_count > 1) - return -EINVAL; - - ret = svc_rdma_build_read_chunk(info, - pcl_first_chunk(&head->rc_call_pcl)); + ret = svc_rdma_read_call_chunk(info); if (ret < 0) goto out; @@ -935,9 +1140,12 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, info->ri_pageoff = 0; info->ri_totalbytes = 0; - if (pcl_is_empty(&head->rc_call_pcl)) - ret = svc_rdma_read_data_items(info); - else + if (pcl_is_empty(&head->rc_call_pcl)) { + if (head->rc_read_pcl.cl_count == 1) + ret = svc_rdma_read_data_item(info); + else + ret = svc_rdma_read_multiple_chunks(info); + } else ret = svc_rdma_read_special(info); if (ret < 0) goto out_err; From 25fef48bdbe7cac5ba5577eab6a750e1caea43bc Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 1 Nov 2020 07:32:34 -0800 Subject: [PATCH 022/131] NFSD: A semicolon is not needed after a switch statement. Signed-off-by: Tom Rix Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1d2c33cd3a87..e3c6bea83bd6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2558,7 +2558,7 @@ static u32 nfs4_file_type(umode_t mode) case S_IFREG: return NF4REG; case S_IFSOCK: return NF4SOCK; default: return NF4BAD; - }; + } } static inline __be32 From 71fd721839a74d945c242299f6be29a246fc2131 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 6 Nov 2020 13:40:57 +0800 Subject: [PATCH 023/131] nfsd/nfs3: remove unused macro nfsd3_fhandleres The macro is unused, remove it to tame gcc warning: fs/nfsd/nfs3proc.c:702:0: warning: macro "nfsd3_fhandleres" is not used [-Wunused-macros] Signed-off-by: Alex Shi Cc: "J. Bruce Fields" Cc: Chuck Lever Cc: linux-nfs@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index a633044b0dc1..d9be589fed15 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -689,7 +689,6 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) #define nfsd3_mkdirargs nfsd3_createargs #define nfsd3_readdirplusargs nfsd3_readdirargs #define nfsd3_fhandleargs nfsd_fhandle -#define nfsd3_fhandleres nfsd3_attrstat #define nfsd3_attrstatres nfsd3_attrstat #define nfsd3_wccstatres nfsd3_attrstat #define nfsd3_createres nfsd3_diropres From 156708adf2d9b8e5227bcb370811dbfd471371d9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jul 2020 14:39:00 -0400 Subject: [PATCH 024/131] SUNRPC: Move the svc_xdr_recvfrom() tracepoint Commit c509f15a5801 ("SUNRPC: Split the xdr_buf event class") added display of the rqst's XID to the svc_xdr_buf_class. However, when the recvfrom tracepoint fires, rq_xid has yet to be filled in with the current XID. So it ends up recording the previous XID that was handled by that svc_rqst. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 24 ------------------------ net/sunrpc/svc_xprt.c | 4 +--- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2a03263b5f9d..58994e013022 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1500,30 +1500,6 @@ SVC_RQST_FLAG_LIST #define show_rqstp_flags(flags) \ __print_flags(flags, "|", SVC_RQST_FLAG_LIST) -TRACE_EVENT(svc_recv, - TP_PROTO(struct svc_rqst *rqst, int len), - - TP_ARGS(rqst, len), - - TP_STRUCT__entry( - __field(u32, xid) - __field(int, len) - __field(unsigned long, flags) - __string(addr, rqst->rq_xprt->xpt_remotebuf) - ), - - TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); - __entry->len = len; - __entry->flags = rqst->rq_flags; - __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); - ), - - TP_printk("addr=%s xid=0x%08x len=%d flags=%s", - __get_str(addr), __entry->xid, __entry->len, - show_rqstp_flags(__entry->flags)) -); - TRACE_DEFINE_ENUM(SVC_GARBAGE); TRACE_DEFINE_ENUM(SVC_SYSERR); TRACE_DEFINE_ENUM(SVC_VALID); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 43cf8dbde898..5fb9164aa690 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -813,8 +813,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) len = svc_deferred_recv(rqstp); else len = xprt->xpt_ops->xpo_recvfrom(rqstp); - if (len > 0) - trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg); rqstp->rq_stime = ktime_get(); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); @@ -868,7 +866,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; - trace_svc_recv(rqstp, len); + trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg); return len; out_release: rqstp->rq_res.len = 0; From b76278ae68848cea13b325d247aa5cf31c87edac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Aug 2020 12:56:40 -0400 Subject: [PATCH 025/131] NFSD: Clean up the show_nf_may macro Display all currently possible NFSD_MAY permission flags. Move and rename show_nf_may with a more generic name because the NFSD_MAY permission flags are used in other places besides the file cache. Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 99bf07800cd0..f1bc55c108b5 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -12,6 +12,22 @@ #include "export.h" #include "nfsfh.h" +#define show_nfsd_may_flags(x) \ + __print_flags(x, "|", \ + { NFSD_MAY_EXEC, "EXEC" }, \ + { NFSD_MAY_WRITE, "WRITE" }, \ + { NFSD_MAY_READ, "READ" }, \ + { NFSD_MAY_SATTR, "SATTR" }, \ + { NFSD_MAY_TRUNC, "TRUNC" }, \ + { NFSD_MAY_LOCK, "LOCK" }, \ + { NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \ + { NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \ + { NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \ + { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \ + { NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \ + { NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \ + { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }) + TRACE_EVENT(nfsd_compound, TP_PROTO(const struct svc_rqst *rqst, u32 args_opcnt), @@ -421,6 +437,9 @@ TRACE_EVENT(nfsd_clid_inuse_err, __entry->cl_boot, __entry->cl_id) ) +/* + * from fs/nfsd/filecache.h + */ TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); @@ -435,13 +454,6 @@ TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED); { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) -/* FIXME: This should probably be fleshed out in the future. */ -#define show_nf_may(val) \ - __print_flags(val, "|", \ - { NFSD_MAY_READ, "READ" }, \ - { NFSD_MAY_WRITE, "WRITE" }, \ - { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }) - DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), TP_ARGS(nf), @@ -466,7 +478,7 @@ DECLARE_EVENT_CLASS(nfsd_file_class, __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nf_may(__entry->nf_may), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file) ) @@ -492,10 +504,10 @@ TRACE_EVENT(nfsd_file_acquire, __field(u32, xid) __field(unsigned int, hash) __field(void *, inode) - __field(unsigned int, may_flags) + __field(unsigned long, may_flags) __field(int, nf_ref) __field(unsigned long, nf_flags) - __field(unsigned char, nf_may) + __field(unsigned long, nf_may) __field(struct file *, nf_file) __field(u32, status) ), @@ -514,10 +526,10 @@ TRACE_EVENT(nfsd_file_acquire, TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u", __entry->xid, __entry->hash, __entry->inode, - show_nf_may(__entry->may_flags), __entry->nf_ref, - show_nf_flags(__entry->nf_flags), - show_nf_may(__entry->nf_may), __entry->nf_file, - __entry->status) + show_nfsd_may_flags(__entry->may_flags), + __entry->nf_ref, show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), + __entry->nf_file, __entry->status) ); DECLARE_EVENT_CLASS(nfsd_file_search_class, From 3a90e1dff16afdae6e1c918bfaff24f4d0f84869 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 Sep 2020 15:06:26 -0400 Subject: [PATCH 026/131] NFSD: Remove extra "0x" in tracepoint format specifier Clean up: %p adds its own 0x already. Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index f1bc55c108b5..89f218d0279c 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -473,7 +473,7 @@ DECLARE_EVENT_CLASS(nfsd_file_class, __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p", + TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p", __entry->nf_hashval, __entry->nf_inode, __entry->nf_ref, @@ -524,7 +524,7 @@ TRACE_EVENT(nfsd_file_acquire, __entry->status = be32_to_cpu(status); ), - TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u", + TP_printk("xid=0x%x hash=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u", __entry->xid, __entry->hash, __entry->inode, show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), @@ -545,7 +545,7 @@ DECLARE_EVENT_CLASS(nfsd_file_search_class, __entry->hash = hash; __entry->found = found; ), - TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash, + TP_printk("hash=0x%x inode=%p found=%d", __entry->hash, __entry->inode, __entry->found) ); @@ -573,7 +573,7 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->mode = inode->i_mode; __entry->mask = mask; ), - TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode, + TP_printk("inode=%p nlink=%u mode=0%ho mask=0x%x", __entry->inode, __entry->nlink, __entry->mode, __entry->mask) ); From f45a444cfe582b85af937a30d35d68d9a84399dd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 27 Aug 2020 16:09:53 -0400 Subject: [PATCH 027/131] NFSD: Add SPDX header for fs/nfsd/trace.c Clean up. The file was contributed in 2014 by Christoph Hellwig in commit 31ef83dc0538 ("nfsd: add trace events"). Signed-off-by: Chuck Lever --- fs/nfsd/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c index 90967466a1e5..f008b95ceec2 100644 --- a/fs/nfsd/trace.c +++ b/fs/nfsd/trace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #define CREATE_TRACE_POINTS #include "trace.h" From 231307df246eb29f30092836524ebb1fcb8f5b25 Mon Sep 17 00:00:00 2001 From: Huang Guobin Date: Wed, 25 Nov 2020 03:39:33 -0500 Subject: [PATCH 028/131] nfsd: Fix error return code in nfsd_file_cache_init() Fix to return PTR_ERR() error code from the error handling case instead of 0 in function nfsd_file_cache_init(), as done elsewhere in this function. Fixes: 65294c1f2c5e7("nfsd: add a new struct file caching facility to nfsd") Signed-off-by: Huang Guobin Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 3c6c2f7d1688..d77c624c61f6 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -685,6 +685,7 @@ nfsd_file_cache_init(void) if (IS_ERR(nfsd_file_fsnotify_group)) { pr_err("nfsd: unable to create fsnotify group: %ld\n", PTR_ERR(nfsd_file_fsnotify_group)); + ret = PTR_ERR(nfsd_file_fsnotify_group); nfsd_file_fsnotify_group = NULL; goto out_notifier; } From 0ae4c3e8a64ace1b8d7de033b0751afe43024416 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Nov 2020 15:52:47 -0500 Subject: [PATCH 029/131] SUNRPC: Add xdr_set_scratch_page() and xdr_reset_scratch_buffer() Clean up: De-duplicate some frequently-used code. Signed-off-by: Chuck Lever --- fs/nfs/blocklayout/blocklayout.c | 2 +- fs/nfs/blocklayout/dev.c | 2 +- fs/nfs/dir.c | 2 +- fs/nfs/filelayout/filelayout.c | 2 +- fs/nfs/filelayout/filelayoutdev.c | 2 +- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- fs/nfs/nfs42xdr.c | 2 +- fs/nfs/nfs4xdr.c | 6 ++-- fs/nfsd/nfs4proc.c | 2 +- include/linux/sunrpc/xdr.h | 44 ++++++++++++++++++++++- net/sunrpc/auth_gss/gss_rpc_xdr.c | 2 +- net/sunrpc/xdr.c | 28 +++------------ 13 files changed, 59 insertions(+), 39 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 08108b6d2fa1..3be6836074ae 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -697,7 +697,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, xdr_init_decode_pages(&xdr, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&xdr, scratch); status = -EIO; p = xdr_inline_decode(&xdr, 4); diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index dec5880ac6de..acb1d22907da 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -510,7 +510,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out; xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&xdr, scratch); p = xdr_inline_decode(&xdr, sizeof(__be32)); if (!p) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4e011adaf967..8a24fe20dccf 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -576,7 +576,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en goto out_nopages; xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); do { if (entry->label) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 7f5aa0403e16..d158a500c25c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -666,7 +666,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, return -ENOMEM; xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), * num_fh (4) */ diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index d913e818858f..86c3f7e69ec4 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -82,7 +82,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err; xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); /* Get the stripe count (number of stripe index) */ p = xdr_inline_decode(&stream, 4); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index a163533446fa..d7010686d39a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -378,7 +378,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); /* stripe unit and mirror_array_cnt */ rc = -EIO; diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 3eda40a320a5..c9b61b818ec1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -69,7 +69,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, INIT_LIST_HEAD(&dsaddrs); xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); /* multipath count */ p = xdr_inline_decode(&stream, 4); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 6e060a88f98c..cb5d4da2308f 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1540,7 +1540,7 @@ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, struct compound_hdr hdr; int status; - xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE); + xdr_set_scratch_page(xdr, res->scratch); status = decode_compound_hdr(xdr, &hdr); if (status) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c6dbfcae7517..2eabe5add344 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6403,10 +6403,8 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct compound_hdr hdr; int status; - if (res->acl_scratch != NULL) { - void *p = page_address(res->acl_scratch); - xdr_set_scratch_buffer(xdr, p, PAGE_SIZE); - } + if (res->acl_scratch != NULL) + xdr_set_scratch_page(xdr, res->acl_scratch); status = decode_compound_hdr(xdr, &hdr); if (status) goto out; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e83b21778816..20772f6b0b2d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2276,7 +2276,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; /* Tail and page_len should be zero at this point: */ buf->len = buf->head[0].iov_len; - xdr->scratch.iov_len = 0; + xdr_reset_scratch_buffer(xdr); xdr->page_ptr = buf->pages - 1; buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) - rqstp->rq_auth_slack; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index ec2a22ccdc2a..2729d2d6efce 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -248,7 +248,6 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); -extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); @@ -256,6 +255,49 @@ extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned in extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); +/** + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to an empty buffer + * @buflen: size of 'buf' + * + * The scratch buffer is used when decoding from an array of pages. + * If an xdr_inline_decode() call spans across page boundaries, then + * we copy the data into the scratch buffer in order to allow linear + * access. + */ +static inline void +xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) +{ + xdr->scratch.iov_base = buf; + xdr->scratch.iov_len = buflen; +} + +/** + * xdr_set_scratch_page - Attach a scratch buffer for decoding data + * @xdr: pointer to xdr_stream struct + * @page: an anonymous page + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) +{ + xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); +} + +/** + * xdr_reset_scratch_buffer - Clear scratch buffer information + * @xdr: pointer to xdr_stream struct + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_reset_scratch_buffer(struct xdr_stream *xdr) +{ + xdr_set_scratch_buffer(xdr, NULL, 0); +} + /** * xdr_stream_remaining - Return the number of bytes remaining in the stream * @xdr: pointer to struct xdr_stream diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 2ff7b7083eba..c636c648849b 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -789,7 +789,7 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, scratch = alloc_page(GFP_KERNEL); if (!scratch) return -ENOMEM; - xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(xdr, scratch); /* res->status */ err = gssx_dec_status(xdr, &res->status); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 28f81769a27c..c607744b3ea8 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -669,7 +669,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; - xdr_set_scratch_buffer(xdr, NULL, 0); + xdr_reset_scratch_buffer(xdr); BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; @@ -713,7 +713,7 @@ inline void xdr_commit_encode(struct xdr_stream *xdr) page = page_address(*xdr->page_ptr); memcpy(xdr->scratch.iov_base, page, shift); memmove(page, page + shift, (void *)xdr->p - page); - xdr->scratch.iov_len = 0; + xdr_reset_scratch_buffer(xdr); } EXPORT_SYMBOL_GPL(xdr_commit_encode); @@ -743,8 +743,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, * the "scratch" iov to track any temporarily unused fragment of * space at the end of the previous buffer: */ - xdr->scratch.iov_base = xdr->p; - xdr->scratch.iov_len = frag1bytes; + xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes); p = page_address(*xdr->page_ptr); /* * Note this is where the next encode will start after we've @@ -1052,8 +1051,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst) { xdr->buf = buf; - xdr->scratch.iov_base = NULL; - xdr->scratch.iov_len = 0; + xdr_reset_scratch_buffer(xdr); xdr->nwords = XDR_QUADLEN(buf->len); if (buf->head[0].iov_len != 0) xdr_set_iov(xdr, buf->head, buf->len); @@ -1101,24 +1099,6 @@ static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) return p; } -/** - * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. - * @xdr: pointer to xdr_stream struct - * @buf: pointer to an empty buffer - * @buflen: size of 'buf' - * - * The scratch buffer is used when decoding from an array of pages. - * If an xdr_inline_decode() call spans across page boundaries, then - * we copy the data into the scratch buffer in order to allow linear - * access. - */ -void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) -{ - xdr->scratch.iov_base = buf; - xdr->scratch.iov_len = buflen; -} -EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer); - static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) { __be32 *p; From 5191955d6fc65e6d4efe8f4f10a6028298f57281 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 5 Nov 2020 11:19:42 -0500 Subject: [PATCH 030/131] SUNRPC: Prepare for xdr_stream-style decoding on the server-side A "permanent" struct xdr_stream is allocated in struct svc_rqst so that it is usable by all server-side decoders. A per-rqst scratch buffer is also allocated to handle decoding XDR data items that cross page boundaries. To demonstrate how it will be used, add the first call site for the new svcxdr_init_decode() API. As an additional part of the overall conversion, add symbolic constants for successful and failed XDR operations. Returning "0" is overloaded. Sometimes it means something failed, but sometimes it means success. To make it more clear when XDR decoding functions succeed or fail, introduce symbolic constants. Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 2 ++ include/linux/sunrpc/svc.h | 16 ++++++++++++++++ net/sunrpc/svc.c | 5 +++++ 3 files changed, 23 insertions(+) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 27b1ad136150..74d5e58eadb0 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1020,6 +1020,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) * (necessary in the NFSv4.0 compound case) */ rqstp->rq_cachetype = proc->pc_cachetype; + + svcxdr_init_decode(rqstp); if (!proc->pc_decode(rqstp, argv->iov_base)) goto out_decode_err; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index c220b734fa69..34c2a69820e9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -247,6 +247,8 @@ struct svc_rqst { size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; + struct xdr_stream rq_arg_stream; + struct page *rq_scratch_page; struct xdr_buf rq_res; struct page *rq_pages[RPCSVC_MAXPAGES + 1]; struct page * *rq_respages; /* points into rq_pages */ @@ -557,4 +559,18 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) svc_reserve(rqstp, space + rqstp->rq_auth_slack); } +/** + * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_decode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct kvec *argv = rqstp->rq_arg.head; + + xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); + xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); +} + #endif /* SUNRPC_SVC_H */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b41500645c3f..4187745887f0 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -614,6 +614,10 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) rqstp->rq_server = serv; rqstp->rq_pool = pool; + rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0); + if (!rqstp->rq_scratch_page) + goto out_enomem; + rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_argp) goto out_enomem; @@ -842,6 +846,7 @@ void svc_rqst_free(struct svc_rqst *rqstp) { svc_release_buffer(rqstp); + put_page(rqstp->rq_scratch_page); kfree(rqstp->rq_resp); kfree(rqstp->rq_argp); kfree(rqstp->rq_auth_data); From 788f7183fba86b46074c16e7d57ea09302badff4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 5 Nov 2020 14:48:29 -0500 Subject: [PATCH 031/131] NFSD: Add common helpers to decode void args and encode void results Start off the conversion to xdr_stream by de-duplicating the functions that decode void arguments and encode void results. Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 21 ++++----------------- fs/nfsd/nfs3acl.c | 8 ++++---- fs/nfsd/nfs3proc.c | 10 ++++------ fs/nfsd/nfs3xdr.c | 11 ----------- fs/nfsd/nfs4proc.c | 11 ++++------- fs/nfsd/nfs4xdr.c | 12 ------------ fs/nfsd/nfsd.h | 8 ++++++++ fs/nfsd/nfsproc.c | 25 ++++++++++++------------- fs/nfsd/nfssvc.c | 28 ++++++++++++++++++++++++++++ fs/nfsd/nfsxdr.c | 10 ---------- fs/nfsd/xdr.h | 2 -- fs/nfsd/xdr3.h | 2 -- fs/nfsd/xdr4.h | 2 -- 13 files changed, 64 insertions(+), 86 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 6a900f770dd2..b0f66604532a 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -185,10 +185,6 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp) /* * XDR decode functions */ -static int nfsaclsvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) -{ - return 1; -} static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) { @@ -255,15 +251,6 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) * XDR encode functions */ -/* - * There must be an encoding function for void results so svc_process - * will work properly. - */ -static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_ressize_check(rqstp, p); -} - /* GETACL */ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) { @@ -378,10 +365,10 @@ struct nfsd3_voidargs { int dummy; }; static const struct svc_procedure nfsd_acl_procedures2[5] = { [ACLPROC2_NULL] = { .pc_func = nfsacld_proc_null, - .pc_decode = nfsaclsvc_decode_voidarg, - .pc_encode = nfsaclsvc_encode_voidres, - .pc_argsize = sizeof(struct nfsd3_voidargs), - .pc_ressize = sizeof(struct nfsd3_voidargs), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, }, diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 34a394e50e1d..7c30876a31a1 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -245,10 +245,10 @@ struct nfsd3_voidargs { int dummy; }; static const struct svc_procedure nfsd_acl_procedures3[3] = { [ACLPROC3_NULL] = { .pc_func = nfsd3_proc_null, - .pc_decode = nfs3svc_decode_voidarg, - .pc_encode = nfs3svc_encode_voidres, - .pc_argsize = sizeof(struct nfsd3_voidargs), - .pc_ressize = sizeof(struct nfsd3_voidargs), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, }, diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index d9be589fed15..76931f4f57c3 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -692,8 +692,6 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) #define nfsd3_attrstatres nfsd3_attrstat #define nfsd3_wccstatres nfsd3_attrstat #define nfsd3_createres nfsd3_diropres -#define nfsd3_voidres nfsd3_voidargs -struct nfsd3_voidargs { int dummy; }; #define ST 1 /* status*/ #define FH 17 /* filehandle with length */ @@ -704,10 +702,10 @@ struct nfsd3_voidargs { int dummy; }; static const struct svc_procedure nfsd_procedures3[22] = { [NFS3PROC_NULL] = { .pc_func = nfsd3_proc_null, - .pc_decode = nfs3svc_decode_voidarg, - .pc_encode = nfs3svc_encode_voidres, - .pc_argsize = sizeof(struct nfsd3_voidargs), - .pc_ressize = sizeof(struct nfsd3_voidres), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, }, diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 186b07a72373..a6718b952975 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -304,11 +304,6 @@ void fill_post_wcc(struct svc_fh *fhp) /* * XDR decode functions */ -int -nfs3svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) -{ - return 1; -} int nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) @@ -642,12 +637,6 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) * XDR encode functions */ -int -nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_ressize_check(rqstp, p); -} - /* GETATTR */ int nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 20772f6b0b2d..76300b0441f0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -3295,16 +3295,13 @@ static const char *nfsd4_op_name(unsigned opnum) return "unknown_operation"; } -#define nfsd4_voidres nfsd4_voidargs -struct nfsd4_voidargs { int dummy; }; - static const struct svc_procedure nfsd_procedures4[2] = { [NFSPROC4_NULL] = { .pc_func = nfsd4_proc_null, - .pc_decode = nfs4svc_decode_voidarg, - .pc_encode = nfs4svc_encode_voidres, - .pc_argsize = sizeof(struct nfsd4_voidargs), - .pc_ressize = sizeof(struct nfsd4_voidres), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 1, }, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e3c6bea83bd6..86a149ce0e84 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5271,12 +5271,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); } -int -nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_ressize_check(rqstp, p); -} - void nfsd4_release_compoundargs(struct svc_rqst *rqstp) { struct nfsd4_compoundargs *args = rqstp->rq_argp; @@ -5294,12 +5288,6 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) } } -int -nfs4svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) -{ - return 1; -} - int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index cb742e17e04a..7907de3f2ee6 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -73,6 +73,14 @@ extern unsigned long nfsd_drc_mem_used; extern const struct seq_operations nfs_exports_op; +/* + * Common void argument and result helpers + */ +struct nfsd_voidargs { }; +struct nfsd_voidres { }; +int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p); +int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p); + /* * Function prototypes. */ diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 0d71549f9d42..9473d048efec 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -609,7 +609,6 @@ nfsd_proc_statfs(struct svc_rqst *rqstp) * NFSv2 Server procedures. * Only the results of non-idempotent operations are cached. */ -struct nfsd_void { int dummy; }; #define ST 1 /* status */ #define FH 8 /* filehandle */ @@ -618,10 +617,10 @@ struct nfsd_void { int dummy; }; static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_NULL] = { .pc_func = nfsd_proc_null, - .pc_decode = nfssvc_decode_void, - .pc_encode = nfssvc_encode_void, - .pc_argsize = sizeof(struct nfsd_void), - .pc_ressize = sizeof(struct nfsd_void), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, }, @@ -647,10 +646,10 @@ static const struct svc_procedure nfsd_procedures2[18] = { }, [NFSPROC_ROOT] = { .pc_func = nfsd_proc_root, - .pc_decode = nfssvc_decode_void, - .pc_encode = nfssvc_encode_void, - .pc_argsize = sizeof(struct nfsd_void), - .pc_ressize = sizeof(struct nfsd_void), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, }, @@ -685,10 +684,10 @@ static const struct svc_procedure nfsd_procedures2[18] = { }, [NFSPROC_WRITECACHE] = { .pc_func = nfsd_proc_writecache, - .pc_decode = nfssvc_decode_void, - .pc_encode = nfssvc_encode_void, - .pc_argsize = sizeof(struct nfsd_void), - .pc_ressize = sizeof(struct nfsd_void), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, }, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 74d5e58eadb0..24fb947a3c12 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1075,6 +1075,34 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) return 1; } +/** + * nfssvc_decode_voidarg - Decode void arguments + * @rqstp: Server RPC transaction context + * @p: buffer containing arguments to decode + * + * Return values: + * %0: Arguments were not valid + * %1: Decoding was successful + */ +int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +{ + return 1; +} + +/** + * nfssvc_encode_voidres - Encode void results + * @rqstp: Server RPC transaction context + * @p: buffer in which to encode results + * + * Return values: + * %0: Local error while encoding + * %1: Encoding was successful + */ +int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +{ + return xdr_ressize_check(rqstp, p); +} + int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 9e00a902113e..7aa6e8aca2c1 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -192,11 +192,6 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f /* * XDR decode functions */ -int -nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_argsize_check(rqstp, p); -} int nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) @@ -423,11 +418,6 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) /* * XDR encode functions */ -int -nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_ressize_check(rqstp, p); -} int nfssvc_encode_stat(struct svc_rqst *rqstp, __be32 *p) diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 0ff336b0b25f..ad77387734cc 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -144,7 +144,6 @@ union nfsd_xdrstore { #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) -int nfssvc_decode_void(struct svc_rqst *, __be32 *); int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *); int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *); int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *); @@ -156,7 +155,6 @@ int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *); int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfssvc_encode_void(struct svc_rqst *, __be32 *); int nfssvc_encode_stat(struct svc_rqst *, __be32 *); int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *); int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index ae6fa6c9cb46..456fcd7a1038 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -273,7 +273,6 @@ union nfsd3_xdrstore { #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) -int nfs3svc_decode_voidarg(struct svc_rqst *, __be32 *); int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *); int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *); @@ -290,7 +289,6 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); -int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *); int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *); int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 679d40af1bbb..37f89ad5e992 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -781,8 +781,6 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); -int nfs4svc_decode_voidarg(struct svc_rqst *, __be32 *); -int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *); int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *); int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); From 0dfdad1c1d1b77b9b085f4da390464dd0ac5647a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 Oct 2020 13:00:29 -0400 Subject: [PATCH 032/131] NFSD: Add tracepoints in nfsd_dispatch() For troubleshooting purposes, record GARBAGE_ARGS and CANT_ENCODE failures. Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 17 ++++---------- fs/nfsd/trace.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 24fb947a3c12..1575e567bbfa 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -29,6 +29,8 @@ #include "netns.h" #include "filecache.h" +#include "trace.h" + #define NFSDDBG_FACILITY NFSDDBG_SVC bool inter_copy_offload_enable; @@ -1009,11 +1011,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) struct kvec *resv = &rqstp->rq_res.head[0]; __be32 *p; - dprintk("nfsd_dispatch: vers %d proc %d\n", - rqstp->rq_vers, rqstp->rq_proc); - if (nfs_request_too_big(rqstp, proc)) - goto out_too_large; + goto out_decode_err; /* * Give the xdr decoder a chance to change this if it wants @@ -1052,24 +1051,18 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) out_cached_reply: return 1; -out_too_large: - dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers); - *statp = rpc_garbage_args; - return 1; - out_decode_err: - dprintk("nfsd: failed to decode arguments!\n"); + trace_nfsd_garbage_args_err(rqstp); *statp = rpc_garbage_args; return 1; out_update_drop: - dprintk("nfsd: Dropping request; may be revisited later\n"); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); out_dropit: return 0; out_encode_err: - dprintk("nfsd: failed to encode result!\n"); + trace_nfsd_cant_encode_err(rqstp); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); *statp = rpc_system_err; return 1; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 89f218d0279c..0bf1707bd846 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -12,6 +12,66 @@ #include "export.h" #include "nfsfh.h" +#define NFSD_TRACE_PROC_ARG_FIELDS \ + __field(unsigned int, netns_ino) \ + __field(u32, xid) \ + __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ + __array(unsigned char, client, sizeof(struct sockaddr_in6)) + +#define NFSD_TRACE_PROC_ARG_ASSIGNMENTS \ + do { \ + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ + __entry->xid = be32_to_cpu(rqstp->rq_xid); \ + memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ + rqstp->rq_xprt->xpt_locallen); \ + memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ + rqstp->rq_xprt->xpt_remotelen); \ + } while (0); + +TRACE_EVENT(nfsd_garbage_args_err, + TP_PROTO( + const struct svc_rqst *rqstp + ), + TP_ARGS(rqstp), + TP_STRUCT__entry( + NFSD_TRACE_PROC_ARG_FIELDS + + __field(u32, vers) + __field(u32, proc) + ), + TP_fast_assign( + NFSD_TRACE_PROC_ARG_ASSIGNMENTS + + __entry->vers = rqstp->rq_vers; + __entry->proc = rqstp->rq_proc; + ), + TP_printk("xid=0x%08x vers=%u proc=%u", + __entry->xid, __entry->vers, __entry->proc + ) +); + +TRACE_EVENT(nfsd_cant_encode_err, + TP_PROTO( + const struct svc_rqst *rqstp + ), + TP_ARGS(rqstp), + TP_STRUCT__entry( + NFSD_TRACE_PROC_ARG_FIELDS + + __field(u32, vers) + __field(u32, proc) + ), + TP_fast_assign( + NFSD_TRACE_PROC_ARG_ASSIGNMENTS + + __entry->vers = rqstp->rq_vers; + __entry->proc = rqstp->rq_proc; + ), + TP_printk("xid=0x%08x vers=%u proc=%u", + __entry->xid, __entry->vers, __entry->proc + ) +); + #define show_nfsd_may_flags(x) \ __print_flags(x, "|", \ { NFSD_MAY_EXEC, "EXEC" }, \ From 08281341be8ebc97ee47999812bcf411942baa1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 21 Nov 2020 11:36:42 -0500 Subject: [PATCH 033/131] NFSD: Add tracepoints in nfsd4_decode/encode_compound() For troubleshooting purposes, record failures to decode NFSv4 operation arguments and encode operation results. trace_nfsd_compound_decode_err() replaces the dprintk() call sites that are embedded in READ_* macros that are about to be removed. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 13 +++++++-- fs/nfsd/trace.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 86a149ce0e84..66edac748272 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -54,6 +54,8 @@ #include "pnfs.h" #include "filecache.h" +#include "trace.h" + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #include #endif @@ -2248,9 +2250,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) READ_BUF(4); op->opnum = be32_to_cpup(p++); - if (nfsd4_opnum_in_range(argp, op)) + if (nfsd4_opnum_in_range(argp, op)) { op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); - else { + if (op->status != nfs_ok) + trace_nfsd_compound_decode_err(argp->rqstp, + argp->opcnt, i, + op->opnum, + op->status); + } else { op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } @@ -5203,6 +5210,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); + if (op->status) + trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status); if (opdesc && opdesc->op_release) opdesc->op_release(&op->u); xdr_commit_encode(xdr); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 0bf1707bd846..92a0973dd671 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -28,6 +28,24 @@ rqstp->rq_xprt->xpt_remotelen); \ } while (0); +#define NFSD_TRACE_PROC_RES_FIELDS \ + __field(unsigned int, netns_ino) \ + __field(u32, xid) \ + __field(unsigned long, status) \ + __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ + __array(unsigned char, client, sizeof(struct sockaddr_in6)) + +#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \ + do { \ + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ + __entry->xid = be32_to_cpu(rqstp->rq_xid); \ + __entry->status = be32_to_cpu(error); \ + memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ + rqstp->rq_xprt->xpt_locallen); \ + memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ + rqstp->rq_xprt->xpt_remotelen); \ + } while (0); + TRACE_EVENT(nfsd_garbage_args_err, TP_PROTO( const struct svc_rqst *rqstp @@ -127,6 +145,56 @@ TRACE_EVENT(nfsd_compound_status, __get_str(name), __entry->status) ) +TRACE_EVENT(nfsd_compound_decode_err, + TP_PROTO( + const struct svc_rqst *rqstp, + u32 args_opcnt, + u32 resp_opcnt, + u32 opnum, + __be32 status + ), + TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status), + TP_STRUCT__entry( + NFSD_TRACE_PROC_RES_FIELDS + + __field(u32, args_opcnt) + __field(u32, resp_opcnt) + __field(u32, opnum) + ), + TP_fast_assign( + NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + + __entry->args_opcnt = args_opcnt; + __entry->resp_opcnt = resp_opcnt; + __entry->opnum = opnum; + ), + TP_printk("op=%u/%u opnum=%u status=%lu", + __entry->resp_opcnt, __entry->args_opcnt, + __entry->opnum, __entry->status) +); + +TRACE_EVENT(nfsd_compound_encode_err, + TP_PROTO( + const struct svc_rqst *rqstp, + u32 opnum, + __be32 status + ), + TP_ARGS(rqstp, opnum, status), + TP_STRUCT__entry( + NFSD_TRACE_PROC_RES_FIELDS + + __field(u32, opnum) + ), + TP_fast_assign( + NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + + __entry->opnum = opnum; + ), + TP_printk("opnum=%u status=%lu", + __entry->opnum, __entry->status) +); + + DECLARE_EVENT_CLASS(nfsd_fh_err_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, From c1346a1216ab5cb04a265380ac9035d91b16b6d5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 11:54:23 -0500 Subject: [PATCH 034/131] NFSD: Replace the internals of the READ_BUF() macro Convert the READ_BUF macro in nfs4xdr.c from open code to instead use the new xdr_stream-style decoders already in use by the encode side (and by the in-kernel NFS client implementation). Once this conversion is done, each individual NFSv4 argument decoder can be independently cleaned up to replace these macros with C code. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 4 +- fs/nfsd/nfs4xdr.c | 181 ++++++------------------------------- fs/nfsd/xdr4.h | 10 +- include/linux/sunrpc/xdr.h | 2 + net/sunrpc/xdr.c | 45 +++++++++ 5 files changed, 77 insertions(+), 165 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 76300b0441f0..df2d6f70c8d4 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1023,8 +1023,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_how_written = write->wr_stable_how; - nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, - &write->wr_head, write->wr_buflen); + nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages, + write->wr_payload.head, write->wr_buflen); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 66edac748272..e3a2912c5e56 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -131,90 +131,13 @@ xdr_error: \ memcpy((x), p, nbytes); \ p += XDR_QUADLEN(nbytes); \ } while (0) - -/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */ -#define READ_BUF(nbytes) do { \ - if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \ - p = argp->p; \ - argp->p += XDR_QUADLEN(nbytes); \ - } else if (!(p = read_buf(argp, nbytes))) { \ - dprintk("NFSD: xdr error (%s:%d)\n", \ - __FILE__, __LINE__); \ - goto xdr_error; \ - } \ -} while (0) - -static void next_decode_page(struct nfsd4_compoundargs *argp) -{ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + XDR_QUADLEN(argp->pagelen); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } -} - -static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) -{ - /* We want more bytes than seem to be available. - * Maybe we need a new page, maybe we have just run out - */ - unsigned int avail = (char *)argp->end - (char *)argp->p; - __be32 *p; - - if (argp->pagelen == 0) { - struct kvec *vec = &argp->rqstp->rq_arg.tail[0]; - - if (!argp->tail) { - argp->tail = true; - avail = vec->iov_len; - argp->p = vec->iov_base; - argp->end = vec->iov_base + avail; - } - - if (avail < nbytes) - return NULL; - - p = argp->p; - argp->p += XDR_QUADLEN(nbytes); - return p; - } - - if (avail + argp->pagelen < nbytes) - return NULL; - if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */ - return NULL; - /* ok, we can do it with the current plus the next page */ - if (nbytes <= sizeof(argp->tmp)) - p = argp->tmp; - else { - kfree(argp->tmpp); - p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL); - if (!p) - return NULL; - - } - /* - * The following memcpy is safe because read_buf is always - * called with nbytes > avail, and the two cases above both - * guarantee p points to at least nbytes bytes. - */ - memcpy(p, argp->p, avail); - next_decode_page(argp); - memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); - argp->p += XDR_QUADLEN(nbytes - avail); - return p; -} - -static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp) -{ - unsigned int this = (char *)argp->end - (char *)argp->p; - - return this + argp->pagelen; -} +#define READ_BUF(nbytes) \ + do { \ + p = xdr_inline_decode(argp->xdr,\ + nbytes); \ + if (!p) \ + goto xdr_error; \ + } while (0) static int zero_clientid(clientid_t *clid) { @@ -261,44 +184,6 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) return p; } -static __be32 -svcxdr_construct_vector(struct nfsd4_compoundargs *argp, struct kvec *head, - struct page ***pagelist, u32 buflen) -{ - int avail; - int len; - int pages; - - /* Sorry .. no magic macros for this.. * - * READ_BUF(write->wr_buflen); - * SAVEMEM(write->wr_buf, write->wr_buflen); - */ - avail = (char *)argp->end - (char *)argp->p; - if (avail + argp->pagelen < buflen) { - dprintk("NFSD: xdr error (%s:%d)\n", - __FILE__, __LINE__); - return nfserr_bad_xdr; - } - head->iov_base = argp->p; - head->iov_len = avail; - *pagelist = argp->pagelist; - - len = XDR_QUADLEN(buflen) << 2; - if (len >= avail) { - len -= avail; - - pages = len >> PAGE_SHIFT; - argp->pagelist += pages; - argp->pagelen -= pages * PAGE_SIZE; - len -= pages * PAGE_SIZE; - - next_decode_page(argp); - } - argp->p += XDR_QUADLEN(len); - - return 0; -} - /** * savemem - duplicate a chunk of memory for later processing * @argp: NFSv4 compound argument structure to be freed with @@ -398,7 +283,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, READ_BUF(4); len += 4; nace = be32_to_cpup(p++); - if (nace > compoundargs_bytes_left(argp)/20) + if (nace > xdr_stream_remaining(argp->xdr) / sizeof(struct nfs4_ace)) /* * Even with 4-byte names there wouldn't be * space for that many aces; something fishy is @@ -929,7 +814,7 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) { - __be32 *p; + DECODE_HEAD; READ_BUF(4); o->len = be32_to_cpup(p++); @@ -939,9 +824,8 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne READ_BUF(o->len); SAVEMEM(o->data, o->len); - return nfs_ok; -xdr_error: - return nfserr_bad_xdr; + + DECODE_TAIL; } static __be32 @@ -1319,10 +1203,8 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) goto xdr_error; write->wr_buflen = be32_to_cpup(p++); - status = svcxdr_construct_vector(argp, &write->wr_head, - &write->wr_pagelist, write->wr_buflen); - if (status) - return status; + if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen)) + goto xdr_error; DECODE_TAIL; } @@ -1891,13 +1773,14 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) */ /* - * Decode data into buffer. Uses head and pages constructed by - * svcxdr_construct_vector. + * Decode data into buffer. */ static __be32 -nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct kvec *head, - struct page **pages, char **bufp, u32 buflen) +nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr, + char **bufp, u32 buflen) { + struct page **pages = xdr->pages; + struct kvec *head = xdr->head; char *tmp, *dp; u32 len; @@ -2012,8 +1895,6 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, { DECODE_HEAD; u32 flags, maxcount, size; - struct kvec head; - struct page **pagelist; READ_BUF(4); flags = be32_to_cpup(p++); @@ -2036,12 +1917,12 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, setxattr->setxa_len = size; if (size > 0) { - status = svcxdr_construct_vector(argp, &head, &pagelist, size); - if (status) - return status; + struct xdr_buf payload; - status = nfsd4_vbuf_from_vector(argp, &head, pagelist, - &setxattr->setxa_buf, size); + if (!xdr_stream_subsegment(argp->xdr, &payload, size)) + goto xdr_error; + status = nfsd4_vbuf_from_vector(argp, &payload, + &setxattr->setxa_buf, size); } DECODE_TAIL; @@ -5288,8 +5169,6 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) kfree(args->ops); args->ops = args->iops; } - kfree(args->tmpp); - args->tmpp = NULL; while (args->to_free) { struct svcxdr_tmpbuf *tb = args->to_free; args->to_free = tb->next; @@ -5302,19 +5181,11 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd4_compoundargs *args = rqstp->rq_argp; - if (rqstp->rq_arg.head[0].iov_len % 4) { - /* client is nuts */ - dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)", - __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid)); - return 0; - } - args->p = p; - args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; - args->pagelist = rqstp->rq_arg.pages; - args->pagelen = rqstp->rq_arg.page_len; - args->tail = false; + /* svcxdr_tmp_alloc */ args->tmpp = NULL; args->to_free = NULL; + + args->xdr = &rqstp->rq_arg_stream; args->ops = args->iops; args->rqstp = rqstp; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 37f89ad5e992..0eb13bd603ea 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -419,8 +419,7 @@ struct nfsd4_write { u64 wr_offset; /* request */ u32 wr_stable_how; /* request */ u32 wr_buflen; /* request */ - struct kvec wr_head; - struct page ** wr_pagelist; /* request */ + struct xdr_buf wr_payload; /* request */ u32 wr_bytes_written; /* response */ u32 wr_how_written; /* response */ @@ -696,15 +695,10 @@ struct svcxdr_tmpbuf { struct nfsd4_compoundargs { /* scratch variables for XDR decode */ - __be32 * p; - __be32 * end; - struct page ** pagelist; - int pagelen; - bool tail; __be32 tmp[8]; __be32 * tmpp; + struct xdr_stream *xdr; struct svcxdr_tmpbuf *to_free; - struct svc_rqst *rqstp; u32 taglen; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2729d2d6efce..cc64cd0891f1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -254,6 +254,8 @@ extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); +extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, + unsigned int len); /** * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index c607744b3ea8..757560a3b06b 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1407,6 +1407,51 @@ int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); +/** + * xdr_stream_subsegment - set @subbuf to a portion of @xdr + * @xdr: an xdr_stream set up for decoding + * @subbuf: the result buffer + * @nbytes: length of @xdr to extract, in bytes + * + * Sets up @subbuf to represent a portion of @xdr. The portion + * starts at the current offset in @xdr, and extends for a length + * of @nbytes. If this is successful, @xdr is advanced to the next + * position following that portion. + * + * Return values: + * %true: @subbuf has been initialized, and @xdr has been advanced. + * %false: a bounds error has occurred + */ +bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, + unsigned int nbytes) +{ + unsigned int remaining, offset, len; + + if (xdr_buf_subsegment(xdr->buf, subbuf, xdr_stream_pos(xdr), nbytes)) + return false; + + if (subbuf->head[0].iov_len) + if (!__xdr_inline_decode(xdr, subbuf->head[0].iov_len)) + return false; + + remaining = subbuf->page_len; + offset = subbuf->page_base; + while (remaining) { + len = min_t(unsigned int, remaining, PAGE_SIZE) - offset; + + if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) + return false; + if (!__xdr_inline_decode(xdr, len)) + return false; + + remaining -= len; + offset = 0; + } + + return true; +} +EXPORT_SYMBOL_GPL(xdr_stream_subsegment); + /** * xdr_buf_trim - lop at most "len" bytes off the end of "buf" * @buf: buf to be trimmed From d169a6a9e5fd7f9e4b74e5e5d2e5a4fd0f84ef05 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:12:27 -0500 Subject: [PATCH 035/131] NFSD: Replace READ* macros in nfsd4_decode_access() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e3a2912c5e56..b0d3a212ea76 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -439,17 +439,6 @@ nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) DECODE_TAIL; } -static __be32 -nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) -{ - DECODE_HEAD; - - READ_BUF(4); - access->ac_req_access = be32_to_cpup(p++); - - DECODE_TAIL; -} - static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) { DECODE_HEAD; @@ -531,6 +520,19 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ DECODE_TAIL; } +/* + * NFSv4 operation argument decoders + */ + +static __be32 +nfsd4_decode_access(struct nfsd4_compoundargs *argp, + struct nfsd4_access *access) +{ + if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0) + return nfserr_bad_xdr; + return nfs_ok; +} + static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) { DECODE_HEAD; From d3d2f38154571e70d5806b5c5264bf61c101ea15 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:18:23 -0500 Subject: [PATCH 036/131] NFSD: Replace READ* macros in nfsd4_decode_close() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b0d3a212ea76..1e5188b2f943 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -439,6 +439,19 @@ nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) DECODE_TAIL; } +static __be32 +nfsd4_decode_stateid4(struct nfsd4_compoundargs *argp, stateid_t *sid) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, NFS4_STATEID_SIZE); + if (!p) + return nfserr_bad_xdr; + sid->si_generation = be32_to_cpup(p++); + memcpy(&sid->si_opaque, p, sizeof(sid->si_opaque)); + return nfs_ok; +} + static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) { DECODE_HEAD; @@ -559,13 +572,9 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) { - DECODE_HEAD; - - READ_BUF(4); - close->cl_seqid = be32_to_cpup(p++); - return nfsd4_decode_stateid(argp, &close->cl_stateid); - - DECODE_TAIL; + if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_stateid4(argp, &close->cl_stateid); } From cbd9abb3706e96563b36af67595707a7054ab693 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:19:51 -0500 Subject: [PATCH 037/131] NFSD: Replace READ* macros in nfsd4_decode_commit() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 12 +++++------- include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1e5188b2f943..ff1e3621d977 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -581,13 +581,11 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) static __be32 nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) { - DECODE_HEAD; - - READ_BUF(12); - p = xdr_decode_hyper(p, &commit->co_offset); - commit->co_count = be32_to_cpup(p++); - - DECODE_TAIL; + if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) + return nfserr_bad_xdr; + return nfs_ok; } static __be32 diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index cc64cd0891f1..cc669d95c484 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -571,6 +571,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) return 0; } +/** + * xdr_stream_decode_u64 - Decode a 64-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store 64-bit integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + xdr_decode_hyper(p, ptr); + return 0; +} + /** * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data * @xdr: pointer to xdr_stream From 081d53fe0b43c47c36d1832b759bf14edde9cdbb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 13:09:13 -0500 Subject: [PATCH 038/131] NFSD: Change the way the expected length of a fattr4 is checked Because the fattr4 is now managed in an xdr_stream, all that is needed is to store the initial position of the stream before decoding the attribute list. Then the actual length of the list is computed using the final stream position, after decoding is complete. No behavior change is expected. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ff1e3621d977..cd114e9cbbc7 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -250,7 +250,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl, struct xdr_netobj *label, int *umask) { - int expected_len, len = 0; + unsigned int starting_pos; + u32 attrlist4_count; u32 dummy32; char *buf; @@ -267,12 +268,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return nfserr_attrnotsupp; } - READ_BUF(4); - expected_len = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &attrlist4_count) < 0) + return nfserr_bad_xdr; + starting_pos = xdr_stream_pos(argp->xdr); if (bmval[0] & FATTR4_WORD0_SIZE) { READ_BUF(8); - len += 8; p = xdr_decode_hyper(p, &iattr->ia_size); iattr->ia_valid |= ATTR_SIZE; } @@ -280,7 +281,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 nace; struct nfs4_ace *ace; - READ_BUF(4); len += 4; + READ_BUF(4); nace = be32_to_cpup(p++); if (nace > xdr_stream_remaining(argp->xdr) / sizeof(struct nfs4_ace)) @@ -297,13 +298,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { - READ_BUF(16); len += 16; + READ_BUF(16); ace->type = be32_to_cpup(p++); ace->flag = be32_to_cpup(p++); ace->access_mask = be32_to_cpup(p++); dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); - len += XDR_QUADLEN(dummy32) << 2; READMEM(buf, dummy32); ace->whotype = nfs4_acl_get_whotype(buf, dummy32); status = nfs_ok; @@ -322,17 +322,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, *acl = NULL; if (bmval[1] & FATTR4_WORD1_MODE) { READ_BUF(4); - len += 4; iattr->ia_mode = be32_to_cpup(p++); iattr->ia_mode &= (S_IFMT | S_IALLUGO); iattr->ia_valid |= ATTR_MODE; } if (bmval[1] & FATTR4_WORD1_OWNER) { READ_BUF(4); - len += 4; dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); - len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) return status; @@ -340,10 +337,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, } if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { READ_BUF(4); - len += 4; dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); - len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) return status; @@ -351,11 +346,9 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, } if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { READ_BUF(4); - len += 4; dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: - len += 12; status = nfsd4_decode_time(argp, &iattr->ia_atime); if (status) return status; @@ -370,11 +363,9 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { READ_BUF(4); - len += 4; dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: - len += 12; status = nfsd4_decode_time(argp, &iattr->ia_mtime); if (status) return status; @@ -392,18 +383,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) && bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { READ_BUF(4); - len += 4; dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ READ_BUF(4); - len += 4; dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */ READ_BUF(4); - len += 4; dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); if (dummy32 > NFS4_MAXLABELLEN) return nfserr_badlabel; - len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); label->len = dummy32; label->data = svcxdr_dupstr(argp, buf, dummy32); @@ -414,15 +401,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (!umask) goto xdr_error; READ_BUF(8); - len += 8; dummy32 = be32_to_cpup(p++); iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO); dummy32 = be32_to_cpup(p++); *umask = dummy32 & S_IRWXUGO; iattr->ia_valid |= ATTR_MODE; } - if (len != expected_len) - goto xdr_error; + + /* request sanity: did attrlist4 contain the expected number of words? */ + if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos) + return nfserr_bad_xdr; DECODE_TAIL; } From 2ac1b9b2afbbacf597dbec722b23b6be62e4e41e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 13:47:16 -0500 Subject: [PATCH 039/131] NFSD: Replace READ* macros that decode the fattr4 size attribute Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index cd114e9cbbc7..7d7efb9978da 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -273,8 +273,11 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, starting_pos = xdr_stream_pos(argp->xdr); if (bmval[0] & FATTR4_WORD0_SIZE) { - READ_BUF(8); - p = xdr_decode_hyper(p, &iattr->ia_size); + u64 size; + + if (xdr_stream_decode_u64(argp->xdr, &size) < 0) + return nfserr_bad_xdr; + iattr->ia_size = size; iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { From c941a96823cf52e742606b486b81ab346bf111c9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 13:02:54 -0500 Subject: [PATCH 040/131] NFSD: Replace READ* macros that decode the fattr4 acl attribute Refactor for clarity and to move infrequently-used code out of line. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 107 +++++++++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 40 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7d7efb9978da..ac2f25949716 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -245,6 +245,70 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) DECODE_TAIL; } +static __be32 +nfsd4_decode_nfsace4(struct nfsd4_compoundargs *argp, struct nfs4_ace *ace) +{ + __be32 *p, status; + u32 length; + + if (xdr_stream_decode_u32(argp->xdr, &ace->type) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &ace->flag) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &ace->access_mask) < 0) + return nfserr_bad_xdr; + + if (xdr_stream_decode_u32(argp->xdr, &length) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, length); + if (!p) + return nfserr_bad_xdr; + ace->whotype = nfs4_acl_get_whotype((char *)p, length); + if (ace->whotype != NFS4_ACL_WHO_NAMED) + status = nfs_ok; + else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) + status = nfsd_map_name_to_gid(argp->rqstp, + (char *)p, length, &ace->who_gid); + else + status = nfsd_map_name_to_uid(argp->rqstp, + (char *)p, length, &ace->who_uid); + + return status; +} + +/* A counted array of nfsace4's */ +static noinline __be32 +nfsd4_decode_acl(struct nfsd4_compoundargs *argp, struct nfs4_acl **acl) +{ + struct nfs4_ace *ace; + __be32 status; + u32 count; + + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + + if (count > xdr_stream_remaining(argp->xdr) / 20) + /* + * Even with 4-byte names there wouldn't be + * space for that many aces; something fishy is + * going on: + */ + return nfserr_fbig; + + *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(count)); + if (*acl == NULL) + return nfserr_jukebox; + + (*acl)->naces = count; + for (ace = (*acl)->aces; ace < (*acl)->aces + count; ace++) { + status = nfsd4_decode_nfsace4(argp, ace); + if (status) + return status; + } + + return nfs_ok; +} + static __be32 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl, @@ -281,46 +345,9 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - u32 nace; - struct nfs4_ace *ace; - - READ_BUF(4); - nace = be32_to_cpup(p++); - - if (nace > xdr_stream_remaining(argp->xdr) / sizeof(struct nfs4_ace)) - /* - * Even with 4-byte names there wouldn't be - * space for that many aces; something fishy is - * going on: - */ - return nfserr_fbig; - - *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace)); - if (*acl == NULL) - return nfserr_jukebox; - - (*acl)->naces = nace; - for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { - READ_BUF(16); - ace->type = be32_to_cpup(p++); - ace->flag = be32_to_cpup(p++); - ace->access_mask = be32_to_cpup(p++); - dummy32 = be32_to_cpup(p++); - READ_BUF(dummy32); - READMEM(buf, dummy32); - ace->whotype = nfs4_acl_get_whotype(buf, dummy32); - status = nfs_ok; - if (ace->whotype != NFS4_ACL_WHO_NAMED) - ; - else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) - status = nfsd_map_name_to_gid(argp->rqstp, - buf, dummy32, &ace->who_gid); - else - status = nfsd_map_name_to_uid(argp->rqstp, - buf, dummy32, &ace->who_uid); - if (status) - return status; - } + status = nfsd4_decode_acl(argp, acl); + if (status) + return status; } else *acl = NULL; if (bmval[1] & FATTR4_WORD1_MODE) { From 1c8f0ad7dd35fd12307904036c7c839f77b6e3f9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 13:54:26 -0500 Subject: [PATCH 041/131] NFSD: Replace READ* macros that decode the fattr4 mode attribute Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ac2f25949716..e6c3b6766997 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -351,8 +351,11 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, } else *acl = NULL; if (bmval[1] & FATTR4_WORD1_MODE) { - READ_BUF(4); - iattr->ia_mode = be32_to_cpup(p++); + u32 mode; + + if (xdr_stream_decode_u32(argp->xdr, &mode) < 0) + return nfserr_bad_xdr; + iattr->ia_mode = mode; iattr->ia_mode &= (S_IFMT | S_IALLUGO); iattr->ia_valid |= ATTR_MODE; } From 9853a5ac9be381917e9be0b4133cd4ac5a7ad875 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 13:56:42 -0500 Subject: [PATCH 042/131] NFSD: Replace READ* macros that decode the fattr4 owner attribute Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e6c3b6766997..903b4db90aaf 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -360,11 +360,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_MODE; } if (bmval[1] & FATTR4_WORD1_OWNER) { - READ_BUF(4); - dummy32 = be32_to_cpup(p++); - READ_BUF(dummy32); - READMEM(buf, dummy32); - if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) + u32 length; + + if (xdr_stream_decode_u32(argp->xdr, &length) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, length); + if (!p) + return nfserr_bad_xdr; + status = nfsd_map_name_to_uid(argp->rqstp, (char *)p, length, + &iattr->ia_uid); + if (status) return status; iattr->ia_valid |= ATTR_UID; } From 393c31dd27f83adb06b07a1b5f0a5b8966a0f01e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 13:58:18 -0500 Subject: [PATCH 043/131] NFSD: Replace READ* macros that decode the fattr4 owner_group attribute Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 903b4db90aaf..433fa670b696 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -374,11 +374,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_UID; } if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { - READ_BUF(4); - dummy32 = be32_to_cpup(p++); - READ_BUF(dummy32); - READMEM(buf, dummy32); - if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) + u32 length; + + if (xdr_stream_decode_u32(argp->xdr, &length) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, length); + if (!p) + return nfserr_bad_xdr; + status = nfsd_map_name_to_gid(argp->rqstp, (char *)p, length, + &iattr->ia_gid); + if (status) return status; iattr->ia_valid |= ATTR_GID; } From 1c3eff7ea4a98c642134ee493001ae13b79ff38c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 14:01:08 -0500 Subject: [PATCH 044/131] NFSD: Replace READ* macros that decode the fattr4 time_set attributes Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 433fa670b696..9b133868468b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -219,6 +219,21 @@ nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv) DECODE_TAIL; } +static __be32 +nfsd4_decode_nfstime4(struct nfsd4_compoundargs *argp, struct timespec64 *tv) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, XDR_UNIT * 3); + if (!p) + return nfserr_bad_xdr; + p = xdr_decode_hyper(p, &tv->tv_sec); + tv->tv_nsec = be32_to_cpup(p++); + if (tv->tv_nsec >= (u32)1000000000) + return nfserr_inval; + return nfs_ok; +} + static __be32 nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) { @@ -388,11 +403,13 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_GID; } if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { - READ_BUF(4); - dummy32 = be32_to_cpup(p++); - switch (dummy32) { + u32 set_it; + + if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0) + return nfserr_bad_xdr; + switch (set_it) { case NFS4_SET_TO_CLIENT_TIME: - status = nfsd4_decode_time(argp, &iattr->ia_atime); + status = nfsd4_decode_nfstime4(argp, &iattr->ia_atime); if (status) return status; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -401,15 +418,17 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_ATIME; break; default: - goto xdr_error; + return nfserr_bad_xdr; } } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { - READ_BUF(4); - dummy32 = be32_to_cpup(p++); - switch (dummy32) { + u32 set_it; + + if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0) + return nfserr_bad_xdr; + switch (set_it) { case NFS4_SET_TO_CLIENT_TIME: - status = nfsd4_decode_time(argp, &iattr->ia_mtime); + status = nfsd4_decode_nfstime4(argp, &iattr->ia_mtime); if (status) return status; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -418,7 +437,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_MTIME; break; default: - goto xdr_error; + return nfserr_bad_xdr; } } From dabe91828f92cd493e9e75efbc10f9878d2a73fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 14:05:51 -0500 Subject: [PATCH 045/131] NFSD: Replace READ* macros that decode the fattr4 security label attribute Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9b133868468b..4e87d035632a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -324,6 +324,33 @@ nfsd4_decode_acl(struct nfsd4_compoundargs *argp, struct nfs4_acl **acl) return nfs_ok; } +static noinline __be32 +nfsd4_decode_security_label(struct nfsd4_compoundargs *argp, + struct xdr_netobj *label) +{ + u32 lfs, pi, length; + __be32 *p; + + if (xdr_stream_decode_u32(argp->xdr, &lfs) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &pi) < 0) + return nfserr_bad_xdr; + + if (xdr_stream_decode_u32(argp->xdr, &length) < 0) + return nfserr_bad_xdr; + if (length > NFS4_MAXLABELLEN) + return nfserr_badlabel; + p = xdr_inline_decode(argp->xdr, length); + if (!p) + return nfserr_bad_xdr; + label->len = length; + label->data = svcxdr_dupstr(argp, p, length); + if (!label->data) + return nfserr_jukebox; + + return nfs_ok; +} + static __be32 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl, @@ -332,7 +359,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, unsigned int starting_pos; u32 attrlist4_count; u32 dummy32; - char *buf; DECODE_HEAD; iattr->ia_valid = 0; @@ -440,24 +466,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return nfserr_bad_xdr; } } - label->len = 0; if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) && bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { - READ_BUF(4); - dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ - READ_BUF(4); - dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */ - READ_BUF(4); - dummy32 = be32_to_cpup(p++); - READ_BUF(dummy32); - if (dummy32 > NFS4_MAXLABELLEN) - return nfserr_badlabel; - READMEM(buf, dummy32); - label->len = dummy32; - label->data = svcxdr_dupstr(argp, buf, dummy32); - if (!label->data) - return nfserr_jukebox; + status = nfsd4_decode_security_label(argp, label); + if (status) + return status; } if (bmval[2] & FATTR4_WORD2_MODE_UMASK) { if (!umask) From 66f0476c704c86d44aa9da19d4753df66f2dbc96 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 14:07:43 -0500 Subject: [PATCH 046/131] NFSD: Replace READ* macros that decode the fattr4 umask attribute Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4e87d035632a..934cbae85ea7 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -358,7 +358,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, { unsigned int starting_pos; u32 attrlist4_count; - u32 dummy32; DECODE_HEAD; iattr->ia_valid = 0; @@ -474,13 +473,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return status; } if (bmval[2] & FATTR4_WORD2_MODE_UMASK) { + u32 mode, mask; + if (!umask) - goto xdr_error; - READ_BUF(8); - dummy32 = be32_to_cpup(p++); - iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO); - dummy32 = be32_to_cpup(p++); - *umask = dummy32 & S_IRWXUGO; + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &mode) < 0) + return nfserr_bad_xdr; + iattr->ia_mode = mode & (S_IFMT | S_IALLUGO); + if (xdr_stream_decode_u32(argp->xdr, &mask) < 0) + return nfserr_bad_xdr; + *umask = mask & S_IRWXUGO; iattr->ia_valid |= ATTR_MODE; } From d1c263a031e876ac3ca5223c728e4d98ed50b3c0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 12:56:05 -0500 Subject: [PATCH 047/131] NFSD: Replace READ* macros in nfsd4_decode_fattr() Let's be more careful to avoid overrunning the memory that backs the bitmap array. This requires updating the synopsis of nfsd4_decode_fattr(). Bruce points out that a server needs to be careful to return nfs_ok when a client presents bitmap bits the server doesn't support. This includes bits in bitmap words the server might not yet support. The current READ* based implementation is good about that, but that requirement hasn't been documented. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 82 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 934cbae85ea7..180769337d54 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -260,6 +260,46 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) DECODE_TAIL; } +/** + * nfsd4_decode_bitmap4 - Decode an NFSv4 bitmap4 + * @argp: NFSv4 compound argument structure + * @bmval: pointer to an array of u32's to decode into + * @bmlen: size of the @bmval array + * + * The server needs to return nfs_ok rather than nfserr_bad_xdr when + * encountering bitmaps containing bits it does not recognize. This + * includes bits in bitmap words past WORDn, where WORDn is the last + * bitmap WORD the implementation currently supports. Thus we are + * careful here to simply ignore bits in bitmap words that this + * implementation has yet to support explicitly. + * + * Return values: + * %nfs_ok: @bmval populated successfully + * %nfserr_bad_xdr: the encoded bitmap was invalid + */ +static __be32 +nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen) +{ + u32 i, count; + __be32 *p; + + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + /* request sanity */ + if (count > 1000) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, count << 2); + if (!p) + return nfserr_bad_xdr; + i = 0; + while (i < count) + bmval[i++] = be32_to_cpup(p++); + while (i < bmlen) + bmval[i++] = 0; + + return nfs_ok; +} + static __be32 nfsd4_decode_nfsace4(struct nfsd4_compoundargs *argp, struct nfs4_ace *ace) { @@ -352,17 +392,18 @@ nfsd4_decode_security_label(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, - struct iattr *iattr, struct nfs4_acl **acl, - struct xdr_netobj *label, int *umask) +nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, + struct iattr *iattr, struct nfs4_acl **acl, + struct xdr_netobj *label, int *umask) { unsigned int starting_pos; u32 attrlist4_count; + __be32 *p, status; - DECODE_HEAD; iattr->ia_valid = 0; - if ((status = nfsd4_decode_bitmap(argp, bmval))) - return status; + status = nfsd4_decode_bitmap4(argp, bmval, bmlen); + if (status) + return nfserr_bad_xdr; if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 @@ -490,7 +531,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos) return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 @@ -690,9 +731,10 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create if ((status = check_filename(create->cr_name, create->cr_namelen))) return status; - status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, - &create->cr_acl, &create->cr_label, - &create->cr_umask); + status = nfsd4_decode_fattr4(argp, create->cr_bmval, + ARRAY_SIZE(create->cr_bmval), + &create->cr_iattr, &create->cr_acl, + &create->cr_label, &create->cr_umask); if (status) goto out; @@ -941,9 +983,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: - status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl, &open->op_label, - &open->op_umask); + status = nfsd4_decode_fattr4(argp, open->op_bmval, + ARRAY_SIZE(open->op_bmval), + &open->op_iattr, &open->op_acl, + &open->op_label, &open->op_umask); if (status) goto out; break; @@ -956,9 +999,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) goto xdr_error; READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); - status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl, &open->op_label, - &open->op_umask); + status = nfsd4_decode_fattr4(argp, open->op_bmval, + ARRAY_SIZE(open->op_bmval), + &open->op_iattr, &open->op_acl, + &open->op_label, &open->op_umask); if (status) goto out; break; @@ -1194,8 +1238,10 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); if (status) return status; - return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, - &setattr->sa_acl, &setattr->sa_label, NULL); + return nfsd4_decode_fattr4(argp, setattr->sa_bmval, + ARRAY_SIZE(setattr->sa_bmval), + &setattr->sa_iattr, &setattr->sa_acl, + &setattr->sa_label, NULL); } static __be32 From 000dfa18b3df9c62df5f768f9187cf1a94ded71d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:24:10 -0500 Subject: [PATCH 048/131] NFSD: Replace READ* macros in nfsd4_decode_create() A dedicated decoder for component4 is introduced here, which will be used by other operation decoders in subsequent patches. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 58 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 180769337d54..85283b55834e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -92,6 +92,8 @@ check_filename(char *str, int len) if (len == 0) return nfserr_inval; + if (len > NFS4_MAXNAMLEN) + return nfserr_nametoolong; if (isdotent(str, len)) return nfserr_badname; for (i = 0; i < len; i++) @@ -205,6 +207,27 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) return ret; } +static __be32 +nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp) +{ + __be32 *p, status; + + if (xdr_stream_decode_u32(argp->xdr, lenp) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, *lenp); + if (!p) + return nfserr_bad_xdr; + status = check_filename((char *)p, *lenp); + if (status) + return status; + *namp = svcxdr_tmpalloc(argp, *lenp); + if (!*namp) + return nfserr_jukebox; + memcpy(*namp, p, *lenp); + + return nfs_ok; +} + static __be32 nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv) { @@ -698,24 +721,27 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit static __be32 nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) { - DECODE_HEAD; + __be32 *p, status; - READ_BUF(4); - create->cr_type = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0) + return nfserr_bad_xdr; switch (create->cr_type) { case NF4LNK: - READ_BUF(4); - create->cr_datalen = be32_to_cpup(p++); - READ_BUF(create->cr_datalen); + if (xdr_stream_decode_u32(argp->xdr, &create->cr_datalen) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, create->cr_datalen); + if (!p) + return nfserr_bad_xdr; create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen); if (!create->cr_data) return nfserr_jukebox; break; case NF4BLK: case NF4CHR: - READ_BUF(8); - create->cr_specdata1 = be32_to_cpup(p++); - create->cr_specdata2 = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata1) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata2) < 0) + return nfserr_bad_xdr; break; case NF4SOCK: case NF4FIFO: @@ -723,22 +749,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create default: break; } - - READ_BUF(4); - create->cr_namelen = be32_to_cpup(p++); - READ_BUF(create->cr_namelen); - SAVEMEM(create->cr_name, create->cr_namelen); - if ((status = check_filename(create->cr_name, create->cr_namelen))) + status = nfsd4_decode_component4(argp, &create->cr_name, + &create->cr_namelen); + if (status) return status; - status = nfsd4_decode_fattr4(argp, create->cr_bmval, ARRAY_SIZE(create->cr_bmval), &create->cr_iattr, &create->cr_acl, &create->cr_label, &create->cr_umask); if (status) - goto out; + return status; - DECODE_TAIL; + return nfs_ok; } static inline __be32 From 95e6482cedfc0785b85db49b72a05323bbf41750 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 21 Nov 2020 14:11:58 -0500 Subject: [PATCH 049/131] NFSD: Replace READ* macros in nfsd4_decode_delegreturn() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 85283b55834e..ff4fdec6b8f3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -766,7 +766,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create static inline __be32 nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) { - return nfsd4_decode_stateid(argp, &dr->dr_stateid); + return nfsd4_decode_stateid4(argp, &dr->dr_stateid); } static inline __be32 From f759eff260f1f0b0f56531517762f27ee3233506 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 19 Nov 2020 14:40:20 -0500 Subject: [PATCH 050/131] NFSD: Replace READ* macros in nfsd4_decode_getattr() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ff4fdec6b8f3..6f32901f8048 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -772,7 +772,8 @@ nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegretu static inline __be32 nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) { - return nfsd4_decode_bitmap(argp, getattr->ga_bmval); + return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, + ARRAY_SIZE(getattr->ga_bmval)); } static __be32 From 5c505d128691c70991b766dd6a3faf49fa59ecfb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 15:01:24 -0500 Subject: [PATCH 051/131] NFSD: Replace READ* macros in nfsd4_decode_link() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6f32901f8048..59f091f9e237 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -779,16 +779,7 @@ nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *geta static __be32 nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) { - DECODE_HEAD; - - READ_BUF(4); - link->li_namelen = be32_to_cpup(p++); - READ_BUF(link->li_namelen); - SAVEMEM(link->li_name, link->li_namelen); - if ((status = check_filename(link->li_name, link->li_namelen))) - return status; - - DECODE_TAIL; + return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); } static __be32 From 5dcbfabb676b2b6d97767209cf707eb463ca232a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 11:41:55 -0500 Subject: [PATCH 052/131] NFSD: Relocate nfsd4_decode_opaque() Enable nfsd4_decode_opaque() to be used in more decoders, and replace the READ* macros in nfsd4_decode_opaque(). Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 59f091f9e237..3c67634963d0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -207,6 +207,33 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) return ret; } + +/* + * NFSv4 basic data type decoders + */ + +static __be32 +nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) +{ + __be32 *p; + u32 len; + + if (xdr_stream_decode_u32(argp->xdr, &len) < 0) + return nfserr_bad_xdr; + if (len == 0 || len > NFS4_OPAQUE_LIMIT) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, len); + if (!p) + return nfserr_bad_xdr; + o->data = svcxdr_tmpalloc(argp, len); + if (!o->data) + return nfserr_jukebox; + o->len = len; + memcpy(o->data, p, len); + + return nfs_ok; +} + static __be32 nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp) { @@ -943,22 +970,6 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) return nfserr_bad_xdr; } -static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) -{ - DECODE_HEAD; - - READ_BUF(4); - o->len = be32_to_cpup(p++); - - if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) - return nfserr_bad_xdr; - - READ_BUF(o->len); - SAVEMEM(o->data, o->len); - - DECODE_TAIL; -} - static __be32 nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) { From 144e82694092ff80b5e64749d6822cd8947587f2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:25:02 -0500 Subject: [PATCH 053/131] NFSD: Add helpers to decode a clientid4 and an NFSv4 state owner These helpers will also be used to simplify decoders in subsequent patches. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3c67634963d0..1b214b475d68 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -609,6 +609,30 @@ nfsd4_decode_stateid4(struct nfsd4_compoundargs *argp, stateid_t *sid) return nfs_ok; } +static __be32 +nfsd4_decode_clientid4(struct nfsd4_compoundargs *argp, clientid_t *clientid) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, sizeof(__be64)); + if (!p) + return nfserr_bad_xdr; + memcpy(clientid, p, sizeof(*clientid)); + return nfs_ok; +} + +static __be32 +nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp, + clientid_t *clientid, struct xdr_netobj *owner) +{ + __be32 status; + + status = nfsd4_decode_clientid4(argp, clientid); + if (status) + return status; + return nfsd4_decode_opaque(argp, owner); +} + static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) { DECODE_HEAD; @@ -832,12 +856,12 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); if (status) return status; - READ_BUF(8 + sizeof(clientid_t)); + READ_BUF(4); lock->lk_new_lock_seqid = be32_to_cpup(p++); - COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); - lock->lk_new_owner.len = be32_to_cpup(p++); - READ_BUF(lock->lk_new_owner.len); - READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); + status = nfsd4_decode_state_owner4(argp, &lock->lk_new_clientid, + &lock->lk_new_owner); + if (status) + return status; } else { status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); if (status) From 8918cc0d2b72db9997390626010b182c4500d749 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:16:52 -0500 Subject: [PATCH 054/131] NFSD: Add helper for decoding locker4 Refactor for clarity. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 64 +++++++++++++++++++++++++------------- include/linux/sunrpc/xdr.h | 21 +++++++++++++ 2 files changed, 64 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1b214b475d68..b85ccc94bfe0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -833,6 +833,48 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); } +static __be32 +nfsd4_decode_open_to_lock_owner4(struct nfsd4_compoundargs *argp, + struct nfsd4_lock *lock) +{ + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_open_seqid) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &lock->lk_new_open_stateid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_lock_seqid) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_state_owner4(argp, &lock->lk_new_clientid, + &lock->lk_new_owner); +} + +static __be32 +nfsd4_decode_exist_lock_owner4(struct nfsd4_compoundargs *argp, + struct nfsd4_lock *lock) +{ + __be32 status; + + status = nfsd4_decode_stateid4(argp, &lock->lk_old_lock_stateid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &lock->lk_old_lock_seqid) < 0) + return nfserr_bad_xdr; + + return nfs_ok; +} + +static __be32 +nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +{ + if (xdr_stream_decode_bool(argp->xdr, &lock->lk_is_new) < 0) + return nfserr_bad_xdr; + if (lock->lk_is_new) + return nfsd4_decode_open_to_lock_owner4(argp, lock); + return nfsd4_decode_exist_lock_owner4(argp, lock); +} + static __be32 nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) { @@ -848,27 +890,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) lock->lk_reclaim = be32_to_cpup(p++); p = xdr_decode_hyper(p, &lock->lk_offset); p = xdr_decode_hyper(p, &lock->lk_length); - lock->lk_is_new = be32_to_cpup(p++); - - if (lock->lk_is_new) { - READ_BUF(4); - lock->lk_new_open_seqid = be32_to_cpup(p++); - status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); - if (status) - return status; - READ_BUF(4); - lock->lk_new_lock_seqid = be32_to_cpup(p++); - status = nfsd4_decode_state_owner4(argp, &lock->lk_new_clientid, - &lock->lk_new_owner); - if (status) - return status; - } else { - status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); - if (status) - return status; - READ_BUF(4); - lock->lk_old_lock_seqid = be32_to_cpup(p++); - } + status = nfsd4_decode_locker4(argp, lock); DECODE_TAIL; } diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index cc669d95c484..9b35ce50cf2b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -550,6 +550,27 @@ static inline bool xdr_item_is_present(const __be32 *p) return *p != xdr_zero; } +/** + * xdr_stream_decode_bool - Decode a boolean + * @xdr: pointer to xdr_stream + * @ptr: pointer to a u32 in which to store the result + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = (*p != xdr_zero); + return 0; +} + /** * xdr_stream_decode_u32 - Decode a 32-bit integer * @xdr: pointer to xdr_stream From 7c59deed5cd2e1cfc6cbecf06f4584ac53755f53 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:29:27 -0500 Subject: [PATCH 055/131] NFSD: Replace READ* macros in nfsd4_decode_lock() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b85ccc94bfe0..68516004b891 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -878,21 +878,17 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) static __be32 nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) { - DECODE_HEAD; - - /* - * type, reclaim(boolean), offset, length, new_lock_owner(boolean) - */ - READ_BUF(28); - lock->lk_type = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) + return nfserr_bad_xdr; if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) - goto xdr_error; - lock->lk_reclaim = be32_to_cpup(p++); - p = xdr_decode_hyper(p, &lock->lk_offset); - p = xdr_decode_hyper(p, &lock->lk_length); - status = nfsd4_decode_locker4(argp, lock); - - DECODE_TAIL; + return nfserr_bad_xdr; + if (xdr_stream_decode_bool(argp->xdr, &lock->lk_reclaim) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lock->lk_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lock->lk_length) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_locker4(argp, lock); } static __be32 From 0a146f04aa0fa7a57aaed3913d1c2732b3853f31 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:31:44 -0500 Subject: [PATCH 056/131] NFSD: Replace READ* macros in nfsd4_decode_lockt() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 68516004b891..15c3d947a6be 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -894,20 +894,16 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) static __be32 nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) { - DECODE_HEAD; - - READ_BUF(32); - lockt->lt_type = be32_to_cpup(p++); - if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) - goto xdr_error; - p = xdr_decode_hyper(p, &lockt->lt_offset); - p = xdr_decode_hyper(p, &lockt->lt_length); - COPYMEM(&lockt->lt_clientid, 8); - lockt->lt_owner.len = be32_to_cpup(p++); - READ_BUF(lockt->lt_owner.len); - READMEM(lockt->lt_owner.data, lockt->lt_owner.len); - - DECODE_TAIL; + if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) + return nfserr_bad_xdr; + if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_length) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_state_owner4(argp, &lockt->lt_clientid, + &lockt->lt_owner); } static __be32 From ca9cf9fc27f8f722e9eb2763173ba01f6ac3dad1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:33:28 -0500 Subject: [PATCH 057/131] NFSD: Replace READ* macros in nfsd4_decode_locku() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 15c3d947a6be..d02b3ac71ccd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -909,21 +909,23 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) static __be32 nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) { - DECODE_HEAD; + __be32 status; - READ_BUF(8); - locku->lu_type = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0) + return nfserr_bad_xdr; if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) - goto xdr_error; - locku->lu_seqid = be32_to_cpup(p++); - status = nfsd4_decode_stateid(argp, &locku->lu_stateid); + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &locku->lu_seqid) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &locku->lu_stateid); if (status) return status; - READ_BUF(16); - p = xdr_decode_hyper(p, &locku->lu_offset); - p = xdr_decode_hyper(p, &locku->lu_length); + if (xdr_stream_decode_u64(argp->xdr, &locku->lu_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &locku->lu_length) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 From 3d5877e8e03f60d7cc804d7b230ff9c00c9c07bd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 15:02:40 -0500 Subject: [PATCH 058/131] NFSD: Replace READ* macros in nfsd4_decode_lookup() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d02b3ac71ccd..893db985c927 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -931,16 +931,7 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) static __be32 nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) { - DECODE_HEAD; - - READ_BUF(4); - lookup->lo_len = be32_to_cpup(p++); - READ_BUF(lookup->lo_len); - SAVEMEM(lookup->lo_name, lookup->lo_len); - if ((status = check_filename(lookup->lo_name, lookup->lo_len))) - return status; - - DECODE_TAIL; + return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); } static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when) From 796dd1c6b680959ac968b52aa507911b288b1749 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:34:01 -0500 Subject: [PATCH 059/131] NFSD: Add helper to decode NFSv4 verifiers This helper will be used to simplify decoders in subsequent patches. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 893db985c927..f858ab45ed37 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -284,6 +284,18 @@ nfsd4_decode_nfstime4(struct nfsd4_compoundargs *argp, struct timespec64 *tv) return nfs_ok; } +static __be32 +nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_bad_xdr; + memcpy(verf->data, p, sizeof(verf->data)); + return nfs_ok; +} + static __be32 nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) { @@ -1047,14 +1059,16 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) goto out; break; case NFS4_CREATE_EXCLUSIVE: - READ_BUF(NFS4_VERIFIER_SIZE); - COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); + status = nfsd4_decode_verifier4(argp, &open->op_verf); + if (status) + return status; break; case NFS4_CREATE_EXCLUSIVE4_1: if (argp->minorversion < 1) goto xdr_error; - READ_BUF(NFS4_VERIFIER_SIZE); - COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); + status = nfsd4_decode_verifier4(argp, &open->op_verf); + if (status) + return status; status = nfsd4_decode_fattr4(argp, open->op_bmval, ARRAY_SIZE(open->op_bmval), &open->op_iattr, &open->op_acl, From bf33bab3c4182cdd795983f14de5606e82fab377 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:37:42 -0500 Subject: [PATCH 060/131] NFSD: Add helper to decode OPEN's createhow4 argument Refactor for clarity. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 78 +++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f858ab45ed37..22e6542499a0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -946,6 +946,48 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); } +static __be32 +nfsd4_decode_createhow4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +{ + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &open->op_createmode) < 0) + return nfserr_bad_xdr; + switch (open->op_createmode) { + case NFS4_CREATE_UNCHECKED: + case NFS4_CREATE_GUARDED: + status = nfsd4_decode_fattr4(argp, open->op_bmval, + ARRAY_SIZE(open->op_bmval), + &open->op_iattr, &open->op_acl, + &open->op_label, &open->op_umask); + if (status) + return status; + break; + case NFS4_CREATE_EXCLUSIVE: + status = nfsd4_decode_verifier4(argp, &open->op_verf); + if (status) + return status; + break; + case NFS4_CREATE_EXCLUSIVE4_1: + if (argp->minorversion < 1) + return nfserr_bad_xdr; + status = nfsd4_decode_verifier4(argp, &open->op_verf); + if (status) + return status; + status = nfsd4_decode_fattr4(argp, open->op_bmval, + ARRAY_SIZE(open->op_bmval), + &open->op_iattr, &open->op_acl, + &open->op_label, &open->op_umask); + if (status) + return status; + break; + default: + return nfserr_bad_xdr; + } + + return nfs_ok; +} + static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when) { __be32 *p; @@ -1046,39 +1088,9 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_OPEN_NOCREATE: break; case NFS4_OPEN_CREATE: - READ_BUF(4); - open->op_createmode = be32_to_cpup(p++); - switch (open->op_createmode) { - case NFS4_CREATE_UNCHECKED: - case NFS4_CREATE_GUARDED: - status = nfsd4_decode_fattr4(argp, open->op_bmval, - ARRAY_SIZE(open->op_bmval), - &open->op_iattr, &open->op_acl, - &open->op_label, &open->op_umask); - if (status) - goto out; - break; - case NFS4_CREATE_EXCLUSIVE: - status = nfsd4_decode_verifier4(argp, &open->op_verf); - if (status) - return status; - break; - case NFS4_CREATE_EXCLUSIVE4_1: - if (argp->minorversion < 1) - goto xdr_error; - status = nfsd4_decode_verifier4(argp, &open->op_verf); - if (status) - return status; - status = nfsd4_decode_fattr4(argp, open->op_bmval, - ARRAY_SIZE(open->op_bmval), - &open->op_iattr, &open->op_acl, - &open->op_label, &open->op_umask); - if (status) - goto out; - break; - default: - goto xdr_error; - } + status = nfsd4_decode_createhow4(argp, open); + if (status) + return status; break; default: goto xdr_error; From e6ec04b27bfb4869c0e35fbcf24333d379f101d5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:41:21 -0500 Subject: [PATCH 061/131] NFSD: Add helper to decode OPEN's openflag4 argument Refactor for clarity. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 22e6542499a0..59d0290b242e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -988,6 +988,28 @@ nfsd4_decode_createhow4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open return nfs_ok; } +static __be32 +nfsd4_decode_openflag4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +{ + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &open->op_create) < 0) + return nfserr_bad_xdr; + switch (open->op_create) { + case NFS4_OPEN_NOCREATE: + break; + case NFS4_OPEN_CREATE: + status = nfsd4_decode_createhow4(argp, open); + if (status) + return status; + break; + default: + return nfserr_bad_xdr; + } + + return nfs_ok; +} + static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when) { __be32 *p; @@ -1082,19 +1104,9 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) status = nfsd4_decode_opaque(argp, &open->op_owner); if (status) goto xdr_error; - READ_BUF(4); - open->op_create = be32_to_cpup(p++); - switch (open->op_create) { - case NFS4_OPEN_NOCREATE: - break; - case NFS4_OPEN_CREATE: - status = nfsd4_decode_createhow4(argp, open); - if (status) - return status; - break; - default: - goto xdr_error; - } + status = nfsd4_decode_openflag4(argp, open); + if (status) + return status; /* open_claim */ READ_BUF(4); From 9aa62f5199749b274454b6d7d914c9b2a5e77031 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:54:48 -0500 Subject: [PATCH 062/131] NFSD: Replace READ* macros in nfsd4_decode_share_access() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 59d0290b242e..6bfb549721b1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1012,11 +1012,10 @@ nfsd4_decode_openflag4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when) { - __be32 *p; u32 w; - READ_BUF(4); - w = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &w) < 0) + return nfserr_bad_xdr; *share_access = w & NFS4_SHARE_ACCESS_MASK; *deleg_want = w & NFS4_SHARE_WANT_MASK; if (deleg_when) @@ -1059,7 +1058,6 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED): return nfs_ok; } -xdr_error: return nfserr_bad_xdr; } From b07bebd9eb9842e2d0dea87efeb92884556e55b0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:56:17 -0500 Subject: [PATCH 063/131] NFSD: Replace READ* macros in nfsd4_decode_share_deny() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6bfb549721b1..fe2c71ad595e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1063,16 +1063,13 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) { - __be32 *p; - - READ_BUF(4); - *x = be32_to_cpup(p++); - /* Note: unlinke access bits, deny bits may be zero. */ + if (xdr_stream_decode_u32(argp->xdr, x) < 0) + return nfserr_bad_xdr; + /* Note: unlike access bits, deny bits may be zero. */ if (*x & ~NFS4_SHARE_DENY_BOTH) return nfserr_bad_xdr; + return nfs_ok; -xdr_error: - return nfserr_bad_xdr; } static __be32 From 1708e50b0145f393acbec9e319bdf0e33f765d25 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:45:04 -0500 Subject: [PATCH 064/131] NFSD: Add helper to decode OPEN's open_claim4 argument Refactor for clarity. Note that op_fname is the only instance of an NFSv4 filename stored in a struct xdr_netobj. Convert it to a u32/char * pair so that the new nfsd4_decode_filename() helper can be used. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 8 ++-- fs/nfsd/nfs4xdr.c | 95 ++++++++++++++++++++++++---------------------- fs/nfsd/xdr4.h | 3 +- 3 files changed, 56 insertions(+), 50 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index df2d6f70c8d4..56d074a6cb31 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -257,8 +257,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru * in NFSv4 as in v3 except EXCLUSIVE4_1. */ current->fs->umask = open->op_umask; - status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, - open->op_fname.len, &open->op_iattr, + status = do_nfsd_create(rqstp, current_fh, open->op_fname, + open->op_fnamelen, &open->op_iattr, *resfh, open->op_createmode, (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); @@ -283,7 +283,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru * a chance to an acquire a delegation if appropriate. */ status = nfsd_lookup(rqstp, current_fh, - open->op_fname.data, open->op_fname.len, *resfh); + open->op_fname, open->op_fnamelen, *resfh); if (status) goto out; status = nfsd_check_obj_isreg(*resfh); @@ -360,7 +360,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, bool reclaim = false; dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", - (int)open->op_fname.len, open->op_fname.data, + (int)open->op_fnamelen, open->op_fname, open->op_openowner); /* This check required by spec. */ diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fe2c71ad595e..c15d5f8ef191 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1072,6 +1072,55 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) return nfs_ok; } +static __be32 +nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, + struct nfsd4_open *open) +{ + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &open->op_claim_type) < 0) + return nfserr_bad_xdr; + switch (open->op_claim_type) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + status = nfsd4_decode_component4(argp, &open->op_fname, + &open->op_fnamelen); + if (status) + return status; + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + if (xdr_stream_decode_u32(argp->xdr, &open->op_delegate_type) < 0) + return nfserr_bad_xdr; + break; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid); + if (status) + return status; + status = nfsd4_decode_component4(argp, &open->op_fname, + &open->op_fnamelen); + if (status) + return status; + break; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + if (argp->minorversion < 1) + return nfserr_bad_xdr; + /* void */ + break; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + if (argp->minorversion < 1) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid); + if (status) + return status; + break; + default: + return nfserr_bad_xdr; + } + + return nfs_ok; +} + static __be32 nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) { @@ -1102,51 +1151,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) status = nfsd4_decode_openflag4(argp, open); if (status) return status; - - /* open_claim */ - READ_BUF(4); - open->op_claim_type = be32_to_cpup(p++); - switch (open->op_claim_type) { - case NFS4_OPEN_CLAIM_NULL: - case NFS4_OPEN_CLAIM_DELEGATE_PREV: - READ_BUF(4); - open->op_fname.len = be32_to_cpup(p++); - READ_BUF(open->op_fname.len); - SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len))) - return status; - break; - case NFS4_OPEN_CLAIM_PREVIOUS: - READ_BUF(4); - open->op_delegate_type = be32_to_cpup(p++); - break; - case NFS4_OPEN_CLAIM_DELEGATE_CUR: - status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); - if (status) - return status; - READ_BUF(4); - open->op_fname.len = be32_to_cpup(p++); - READ_BUF(open->op_fname.len); - SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len))) - return status; - break; - case NFS4_OPEN_CLAIM_FH: - case NFS4_OPEN_CLAIM_DELEG_PREV_FH: - if (argp->minorversion < 1) - goto xdr_error; - /* void */ - break; - case NFS4_OPEN_CLAIM_DELEG_CUR_FH: - if (argp->minorversion < 1) - goto xdr_error; - status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); - if (status) - return status; - break; - default: - goto xdr_error; - } + status = nfsd4_decode_open_claim4(argp, open); DECODE_TAIL; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0eb13bd603ea..6245004a9993 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -252,7 +252,8 @@ struct nfsd4_listxattrs { struct nfsd4_open { u32 op_claim_type; /* request */ - struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */ + u32 op_fnamelen; + char * op_fname; /* request - everything but CLAIM_PREV */ u32 op_delegate_type; /* request - CLAIM_PREV only */ stateid_t op_delegate_stateid; /* request - response */ u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */ From 61e5e0b3ec713d1365008c8af3fe5fdd262e2a60 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Nov 2020 12:04:06 -0500 Subject: [PATCH 065/131] NFSD: Replace READ* macros in nfsd4_decode_open() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c15d5f8ef191..ece7af152fb1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1124,7 +1124,7 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) { - DECODE_HEAD; + __be32 status; u32 dummy; memset(open->op_bmval, 0, sizeof(open->op_bmval)); @@ -1132,28 +1132,24 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) open->op_openowner = NULL; open->op_xdr_error = 0; - /* seqid, share_access, share_deny, clientid, ownerlen */ - READ_BUF(4); - open->op_seqid = be32_to_cpup(p++); - /* decode, yet ignore deleg_when until supported */ + if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0) + return nfserr_bad_xdr; + /* deleg_want is ignored */ status = nfsd4_decode_share_access(argp, &open->op_share_access, &open->op_deleg_want, &dummy); if (status) - goto xdr_error; + return status; status = nfsd4_decode_share_deny(argp, &open->op_share_deny); if (status) - goto xdr_error; - READ_BUF(sizeof(clientid_t)); - COPYMEM(&open->op_clientid, sizeof(clientid_t)); - status = nfsd4_decode_opaque(argp, &open->op_owner); + return status; + status = nfsd4_decode_state_owner4(argp, &open->op_clientid, + &open->op_owner); if (status) - goto xdr_error; + return status; status = nfsd4_decode_openflag4(argp, open); if (status) return status; - status = nfsd4_decode_open_claim4(argp, open); - - DECODE_TAIL; + return nfsd4_decode_open_claim4(argp, open); } static __be32 From 06bee693a1f1cb774b91000f05a6e183c257d8e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:18:57 -0500 Subject: [PATCH 066/131] NFSD: Replace READ* macros in nfsd4_decode_open_confirm() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ece7af152fb1..4ba8fa8038b8 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1155,18 +1155,18 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) static __be32 nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) { - DECODE_HEAD; + __be32 status; if (argp->minorversion >= 1) return nfserr_notsupp; - status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); + status = nfsd4_decode_stateid4(argp, &open_conf->oc_req_stateid); if (status) return status; - READ_BUF(4); - open_conf->oc_seqid = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 From dca71651f097ea608945d7a66bf62761a630de9a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:21:01 -0500 Subject: [PATCH 067/131] NFSD: Replace READ* macros in nfsd4_decode_open_downgrade() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4ba8fa8038b8..ba141280d59f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1172,21 +1172,19 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con static __be32 nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) { - DECODE_HEAD; - - status = nfsd4_decode_stateid(argp, &open_down->od_stateid); + __be32 status; + + status = nfsd4_decode_stateid4(argp, &open_down->od_stateid); if (status) return status; - READ_BUF(4); - open_down->od_seqid = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &open_down->od_seqid) < 0) + return nfserr_bad_xdr; + /* deleg_want is ignored */ status = nfsd4_decode_share_access(argp, &open_down->od_share_access, &open_down->od_deleg_want, NULL); if (status) return status; - status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); - if (status) - return status; - DECODE_TAIL; + return nfsd4_decode_share_deny(argp, &open_down->od_share_deny); } static __be32 From a73bed98413b1d9eb4466f776a56d2fde8b3b2c9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:23:02 -0500 Subject: [PATCH 068/131] NFSD: Replace READ* macros in nfsd4_decode_putfh() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ba141280d59f..f28bebd17310 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1190,16 +1190,21 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d static __be32 nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) { - DECODE_HEAD; + __be32 *p; - READ_BUF(4); - putfh->pf_fhlen = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0) + return nfserr_bad_xdr; if (putfh->pf_fhlen > NFS4_FHSIZE) - goto xdr_error; - READ_BUF(putfh->pf_fhlen); - SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen); + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen); + if (!p) + return nfserr_bad_xdr; + putfh->pf_fhval = svcxdr_tmpalloc(argp, putfh->pf_fhlen); + if (!putfh->pf_fhval) + return nfserr_jukebox; + memcpy(putfh->pf_fhval, p, putfh->pf_fhlen); - DECODE_TAIL; + return nfs_ok; } static __be32 From 3909c3bc604688503e31ddceb429dc156c4720c1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:28:24 -0500 Subject: [PATCH 069/131] NFSD: Replace READ* macros in nfsd4_decode_read() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f28bebd17310..8982f2717acf 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1218,16 +1218,17 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) static __be32 nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { - DECODE_HEAD; + __be32 status; - status = nfsd4_decode_stateid(argp, &read->rd_stateid); + status = nfsd4_decode_stateid4(argp, &read->rd_stateid); if (status) return status; - READ_BUF(12); - p = xdr_decode_hyper(p, &read->rd_offset); - read->rd_length = be32_to_cpup(p++); + if (xdr_stream_decode_u64(argp->xdr, &read->rd_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &read->rd_length) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 From 0dfaf2a371436860ace6af889e6cd8410ee63164 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:30:59 -0500 Subject: [PATCH 070/131] NFSD: Replace READ* macros in nfsd4_decode_readdir() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8982f2717acf..3c0100a6f35f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1234,17 +1234,22 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) static __be32 nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) { - DECODE_HEAD; + __be32 status; - READ_BUF(24); - p = xdr_decode_hyper(p, &readdir->rd_cookie); - COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); - readdir->rd_dircount = be32_to_cpup(p++); - readdir->rd_maxcount = be32_to_cpup(p++); - if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) - goto out; + if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_verifier4(argp, &readdir->rd_verf); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_dircount) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_maxcount) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_uint32_array(argp->xdr, readdir->rd_bmval, + ARRAY_SIZE(readdir->rd_bmval)) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 From b7f5fbf219aecda98e32de305551e445f9438899 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 15:04:36 -0500 Subject: [PATCH 071/131] NFSD: Replace READ* macros in nfsd4_decode_remove() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3c0100a6f35f..71409a70ef08 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1255,16 +1255,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read static __be32 nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) { - DECODE_HEAD; - - READ_BUF(4); - remove->rm_namelen = be32_to_cpup(p++); - READ_BUF(remove->rm_namelen); - SAVEMEM(remove->rm_name, remove->rm_namelen); - if ((status = check_filename(remove->rm_name, remove->rm_namelen))) - return status; - - DECODE_TAIL; + return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); } static __be32 From ba881a0a5342b3aaf83958901ebe3fe752eaab46 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 15:05:58 -0500 Subject: [PATCH 072/131] NFSD: Replace READ* macros in nfsd4_decode_rename() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 71409a70ef08..08b909bca6b5 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1261,22 +1261,12 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove static __be32 nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) { - DECODE_HEAD; + __be32 status; - READ_BUF(4); - rename->rn_snamelen = be32_to_cpup(p++); - READ_BUF(rename->rn_snamelen); - SAVEMEM(rename->rn_sname, rename->rn_snamelen); - READ_BUF(4); - rename->rn_tnamelen = be32_to_cpup(p++); - READ_BUF(rename->rn_tnamelen); - SAVEMEM(rename->rn_tname, rename->rn_tnamelen); - if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) + status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen); + if (status) return status; - if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen))) - return status; - - DECODE_TAIL; + return nfsd4_decode_component4(argp, &rename->rn_tname, &rename->rn_tnamelen); } static __be32 From d12f90458dc8c11734ba44ec88f109bf8de86ff0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 15:08:50 -0500 Subject: [PATCH 073/131] NFSD: Replace READ* macros in nfsd4_decode_renew() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 08b909bca6b5..91a15ecd2ace 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1272,15 +1272,7 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename static __be32 nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) { - DECODE_HEAD; - - if (argp->minorversion >= 1) - return nfserr_notsupp; - - READ_BUF(sizeof(clientid_t)); - COPYMEM(clientid, sizeof(clientid_t)); - - DECODE_TAIL; + return nfsd4_decode_clientid4(argp, clientid); } static __be32 From d0abdae5191a916d767164f6fc6c0e2e814a20a7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 15:09:42 -0500 Subject: [PATCH 074/131] NFSD: Replace READ* macros in nfsd4_decode_secinfo() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 91a15ecd2ace..76abd7af9107 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1279,16 +1279,7 @@ static __be32 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, struct nfsd4_secinfo *secinfo) { - DECODE_HEAD; - - READ_BUF(4); - secinfo->si_namelen = be32_to_cpup(p++); - READ_BUF(secinfo->si_namelen); - SAVEMEM(secinfo->si_name, secinfo->si_namelen); - status = check_filename(secinfo->si_name, secinfo->si_namelen); - if (status) - return status; - DECODE_TAIL; + return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); } static __be32 From 44592fe9479d8d4b88594365ab825f7b07afdf7c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 21 Nov 2020 14:14:59 -0500 Subject: [PATCH 075/131] NFSD: Replace READ* macros in nfsd4_decode_setattr() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 76abd7af9107..a7ee5c79ff94 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1298,7 +1298,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta { __be32 status; - status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); + status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid); if (status) return status; return nfsd4_decode_fattr4(argp, setattr->sa_bmval, From 92fa6c08c251d52d0d7b46066ecf87b96a0c4b8f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:35:02 -0500 Subject: [PATCH 076/131] NFSD: Replace READ* macros in nfsd4_decode_setclientid() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a7ee5c79ff94..2190f50f8ba2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1310,31 +1310,46 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta static __be32 nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) { - DECODE_HEAD; + __be32 *p, status; if (argp->minorversion >= 1) return nfserr_notsupp; - READ_BUF(NFS4_VERIFIER_SIZE); - COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); - + status = nfsd4_decode_verifier4(argp, &setclientid->se_verf); + if (status) + return status; status = nfsd4_decode_opaque(argp, &setclientid->se_name); if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_prog) < 0) return nfserr_bad_xdr; - READ_BUF(8); - setclientid->se_callback_prog = be32_to_cpup(p++); - setclientid->se_callback_netid_len = be32_to_cpup(p++); - READ_BUF(setclientid->se_callback_netid_len); - SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); - READ_BUF(4); - setclientid->se_callback_addr_len = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_netid_len) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len); + if (!p) + return nfserr_bad_xdr; + setclientid->se_callback_netid_val = svcxdr_tmpalloc(argp, + setclientid->se_callback_netid_len); + if (!setclientid->se_callback_netid_val) + return nfserr_jukebox; + memcpy(setclientid->se_callback_netid_val, p, + setclientid->se_callback_netid_len); - READ_BUF(setclientid->se_callback_addr_len); - SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); - READ_BUF(4); - setclientid->se_callback_ident = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len); + if (!p) + return nfserr_bad_xdr; + setclientid->se_callback_addr_val = svcxdr_tmpalloc(argp, + setclientid->se_callback_addr_len); + if (!setclientid->se_callback_addr_val) + return nfserr_jukebox; + memcpy(setclientid->se_callback_addr_val, p, + setclientid->se_callback_addr_len); + if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 From d1ca55149d67e5896f89a30053f5d83c002ac10e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 15:12:33 -0500 Subject: [PATCH 077/131] NFSD: Replace READ* macros in nfsd4_decode_setclientid_confirm() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2190f50f8ba2..5f7917014778 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1355,16 +1355,15 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient static __be32 nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) { - DECODE_HEAD; + __be32 status; if (argp->minorversion >= 1) return nfserr_notsupp; - READ_BUF(8 + NFS4_VERIFIER_SIZE); - COPYMEM(&scd_c->sc_clientid, 8); - COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); - - DECODE_TAIL; + status = nfsd4_decode_clientid4(argp, &scd_c->sc_clientid); + if (status) + return status; + return nfsd4_decode_verifier4(argp, &scd_c->sc_confirm); } /* Also used for NVERIFY */ From 67cd453eeda86be90f83a0f4798f33832cf2d98c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:40:32 -0500 Subject: [PATCH 078/131] NFSD: Replace READ* macros in nfsd4_decode_verify() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5f7917014778..4ec08eb48e33 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1370,20 +1370,27 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s static __be32 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) { - DECODE_HEAD; + __be32 *p, status; - if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval))) - goto out; + status = nfsd4_decode_bitmap4(argp, verify->ve_bmval, + ARRAY_SIZE(verify->ve_bmval)); + if (status) + return status; /* For convenience's sake, we compare raw xdr'd attributes in * nfsd4_proc_verify */ - READ_BUF(4); - verify->ve_attrlen = be32_to_cpup(p++); - READ_BUF(verify->ve_attrlen); - SAVEMEM(verify->ve_attrval, verify->ve_attrlen); + if (xdr_stream_decode_u32(argp->xdr, &verify->ve_attrlen) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, verify->ve_attrlen); + if (!p) + return nfserr_bad_xdr; + verify->ve_attrval = svcxdr_tmpalloc(argp, verify->ve_attrlen); + if (!verify->ve_attrval) + return nfserr_jukebox; + memcpy(verify->ve_attrval, p, verify->ve_attrlen); - DECODE_TAIL; + return nfs_ok; } static __be32 From 244e2befcba80f42c65293b6c56282bb78f9f417 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:44:28 -0500 Subject: [PATCH 079/131] NFSD: Replace READ* macros in nfsd4_decode_write() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4ec08eb48e33..8e960cdc351d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1396,22 +1396,23 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify static __be32 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) { - DECODE_HEAD; + __be32 status; - status = nfsd4_decode_stateid(argp, &write->wr_stateid); + status = nfsd4_decode_stateid4(argp, &write->wr_stateid); if (status) return status; - READ_BUF(16); - p = xdr_decode_hyper(p, &write->wr_offset); - write->wr_stable_how = be32_to_cpup(p++); + if (xdr_stream_decode_u64(argp->xdr, &write->wr_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &write->wr_stable_how) < 0) + return nfserr_bad_xdr; if (write->wr_stable_how > NFS_FILE_SYNC) - goto xdr_error; - write->wr_buflen = be32_to_cpup(p++); - + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &write->wr_buflen) < 0) + return nfserr_bad_xdr; if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen)) - goto xdr_error; + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 From a4a80c15ca4dd998ab5cbe87bd856c626a318a80 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 13:42:25 -0500 Subject: [PATCH 080/131] NFSD: Replace READ* macros in nfsd4_decode_release_lockowner() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8e960cdc351d..667f6d9314a7 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1418,20 +1418,20 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) static __be32 nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) { - DECODE_HEAD; + __be32 status; if (argp->minorversion >= 1) return nfserr_notsupp; - READ_BUF(12); - COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); - rlockowner->rl_owner.len = be32_to_cpup(p++); - READ_BUF(rlockowner->rl_owner.len); - READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); + status = nfsd4_decode_state_owner4(argp, &rlockowner->rl_clientid, + &rlockowner->rl_owner); + if (status) + return status; if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid)) return nfserr_inval; - DECODE_TAIL; + + return nfs_ok; } static __be32 From 1a99440807bfc66597aaa2e0f0213c319b023e34 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:09:34 -0500 Subject: [PATCH 081/131] NFSD: Replace READ* macros in nfsd4_decode_cb_sec() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 165 ++++++++++++++++++++++++++++++---------------- 1 file changed, 107 insertions(+), 58 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 667f6d9314a7..61f9186c8be4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -212,6 +212,25 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) * NFSv4 basic data type decoders */ +/* + * This helper handles variable-length opaques which belong to protocol + * elements that this implementation does not support. + */ +static __be32 +nfsd4_decode_ignored_string(struct nfsd4_compoundargs *argp, u32 maxlen) +{ + u32 len; + + if (xdr_stream_decode_u32(argp->xdr, &len) < 0) + return nfserr_bad_xdr; + if (maxlen && len > maxlen) + return nfserr_bad_xdr; + if (!xdr_inline_decode(argp->xdr, len)) + return nfserr_bad_xdr; + + return nfs_ok; +} + static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) { @@ -645,87 +664,117 @@ nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp, return nfsd4_decode_opaque(argp, owner); } -static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) +/* Defined in Appendix A of RFC 5531 */ +static __be32 +nfsd4_decode_authsys_parms(struct nfsd4_compoundargs *argp, + struct nfsd4_cb_sec *cbs) { - DECODE_HEAD; - struct user_namespace *userns = nfsd_user_namespace(argp->rqstp); - u32 dummy, uid, gid; - char *machine_name; - int i; - int nr_secflavs; + u32 stamp, gidcount, uid, gid; + __be32 *p, status; + + if (xdr_stream_decode_u32(argp->xdr, &stamp) < 0) + return nfserr_bad_xdr; + /* machine name */ + status = nfsd4_decode_ignored_string(argp, 255); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &uid) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &gid) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &gidcount) < 0) + return nfserr_bad_xdr; + if (gidcount > 16) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, gidcount << 2); + if (!p) + return nfserr_bad_xdr; + if (cbs->flavor == (u32)(-1)) { + struct user_namespace *userns = nfsd_user_namespace(argp->rqstp); + + kuid_t kuid = make_kuid(userns, uid); + kgid_t kgid = make_kgid(userns, gid); + if (uid_valid(kuid) && gid_valid(kgid)) { + cbs->uid = kuid; + cbs->gid = kgid; + cbs->flavor = RPC_AUTH_UNIX; + } else { + dprintk("RPC_AUTH_UNIX with invalid uid or gid, ignoring!\n"); + } + } + + return nfs_ok; +} + +static __be32 +nfsd4_decode_gss_cb_handles4(struct nfsd4_compoundargs *argp, + struct nfsd4_cb_sec *cbs) +{ + __be32 status; + u32 service; + + dprintk("RPC_AUTH_GSS callback secflavor not supported!\n"); + + if (xdr_stream_decode_u32(argp->xdr, &service) < 0) + return nfserr_bad_xdr; + if (service < RPC_GSS_SVC_NONE || service > RPC_GSS_SVC_PRIVACY) + return nfserr_bad_xdr; + /* gcbp_handle_from_server */ + status = nfsd4_decode_ignored_string(argp, 0); + if (status) + return status; + /* gcbp_handle_from_client */ + status = nfsd4_decode_ignored_string(argp, 0); + if (status) + return status; + + return nfs_ok; +} + +/* a counted array of callback_sec_parms4 items */ +static __be32 +nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) +{ + u32 i, secflavor, nr_secflavs; + __be32 status; /* callback_sec_params4 */ - READ_BUF(4); - nr_secflavs = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &nr_secflavs) < 0) + return nfserr_bad_xdr; if (nr_secflavs) cbs->flavor = (u32)(-1); else /* Is this legal? Be generous, take it to mean AUTH_NONE: */ cbs->flavor = 0; + for (i = 0; i < nr_secflavs; ++i) { - READ_BUF(4); - dummy = be32_to_cpup(p++); - switch (dummy) { + if (xdr_stream_decode_u32(argp->xdr, &secflavor) < 0) + return nfserr_bad_xdr; + switch (secflavor) { case RPC_AUTH_NULL: - /* Nothing to read */ + /* void */ if (cbs->flavor == (u32)(-1)) cbs->flavor = RPC_AUTH_NULL; break; case RPC_AUTH_UNIX: - READ_BUF(8); - /* stamp */ - dummy = be32_to_cpup(p++); - - /* machine name */ - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - SAVEMEM(machine_name, dummy); - - /* uid, gid */ - READ_BUF(8); - uid = be32_to_cpup(p++); - gid = be32_to_cpup(p++); - - /* more gids */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy * 4); - if (cbs->flavor == (u32)(-1)) { - kuid_t kuid = make_kuid(userns, uid); - kgid_t kgid = make_kgid(userns, gid); - if (uid_valid(kuid) && gid_valid(kgid)) { - cbs->uid = kuid; - cbs->gid = kgid; - cbs->flavor = RPC_AUTH_UNIX; - } else { - dprintk("RPC_AUTH_UNIX with invalid" - "uid or gid ignoring!\n"); - } - } + status = nfsd4_decode_authsys_parms(argp, cbs); + if (status) + return status; break; case RPC_AUTH_GSS: - dprintk("RPC_AUTH_GSS callback secflavor " - "not supported!\n"); - READ_BUF(8); - /* gcbp_service */ - dummy = be32_to_cpup(p++); - /* gcbp_handle_from_server */ - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); - /* gcbp_handle_from_client */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); + status = nfsd4_decode_gss_cb_handles4(argp, cbs); + if (status) + return status; break; default: - dprintk("Illegal callback secflavor\n"); return nfserr_inval; } } - DECODE_TAIL; + + return nfs_ok; } + /* * NFSv4 operation argument decoders */ From 0f81d96098f8eb707afe2f8d5c3fe0f9316ef5ce Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:14:35 -0500 Subject: [PATCH 082/131] NFSD: Replace READ* macros in nfsd4_decode_backchannel_ctl() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 61f9186c8be4..b1d32e0744e2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -788,17 +788,6 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, return nfs_ok; } -static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) -{ - DECODE_HEAD; - - READ_BUF(4); - bc->bc_cb_program = be32_to_cpup(p++); - nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); - - DECODE_TAIL; -} - static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) { DECODE_HEAD; @@ -1483,6 +1472,13 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel return nfs_ok; } +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +{ + if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); +} + static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, struct nfsd4_exchange_id *exid) From 571e0451c4de0a545960ffaea16d969931afc563 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:16:23 -0500 Subject: [PATCH 083/131] NFSD: Replace READ* macros in nfsd4_decode_bind_conn_to_session() A dedicated sessionid4 decoder is introduced that will be used by other operation decoders in subsequent patches. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b1d32e0744e2..a35075e7900a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -664,6 +664,19 @@ nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp, return nfsd4_decode_opaque(argp, owner); } +static __be32 +nfsd4_decode_sessionid4(struct nfsd4_compoundargs *argp, + struct nfs4_sessionid *sessionid) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, NFS4_MAX_SESSIONID_LEN); + if (!p) + return nfserr_bad_xdr; + memcpy(sessionid->data, p, sizeof(sessionid->data)); + return nfs_ok; +} + /* Defined in Appendix A of RFC 5531 */ static __be32 nfsd4_decode_authsys_parms(struct nfsd4_compoundargs *argp, @@ -788,18 +801,6 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, return nfs_ok; } -static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) -{ - DECODE_HEAD; - - READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); - COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - bcts->dir = be32_to_cpup(p++); - /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker - * could help us figure out we should be using it. */ - DECODE_TAIL; -} - static __be32 nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) { @@ -1479,6 +1480,22 @@ static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, stru return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); } +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +{ + u32 use_conn_in_rdma_mode; + __be32 status; + + status = nfsd4_decode_sessionid4(argp, &bcts->sessionid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &bcts->dir) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &use_conn_in_rdma_mode) < 0) + return nfserr_bad_xdr; + + return nfs_ok; +} + static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, struct nfsd4_exchange_id *exid) From 2548aa784d760567c2a77cbd8b7c55b211167c37 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 11:13:00 -0500 Subject: [PATCH 084/131] NFSD: Add a separate decoder to handle state_protect_ops Refactor for clarity and de-duplication of code. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 66 +++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a35075e7900a..07716e747ea2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -315,32 +315,6 @@ nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf) return nfs_ok; } -static __be32 -nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) -{ - u32 bmlen; - DECODE_HEAD; - - bmval[0] = 0; - bmval[1] = 0; - bmval[2] = 0; - - READ_BUF(4); - bmlen = be32_to_cpup(p++); - if (bmlen > 1000) - goto xdr_error; - - READ_BUF(bmlen << 2); - if (bmlen > 0) - bmval[0] = be32_to_cpup(p++); - if (bmlen > 1) - bmval[1] = be32_to_cpup(p++); - if (bmlen > 2) - bmval[2] = be32_to_cpup(p++); - - DECODE_TAIL; -} - /** * nfsd4_decode_bitmap4 - Decode an NFSv4 bitmap4 * @argp: NFSv4 compound argument structure @@ -1496,6 +1470,24 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, return nfs_ok; } +static __be32 +nfsd4_decode_state_protect_ops(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) +{ + __be32 status; + + status = nfsd4_decode_bitmap4(argp, exid->spo_must_enforce, + ARRAY_SIZE(exid->spo_must_enforce)); + if (status) + return nfserr_bad_xdr; + status = nfsd4_decode_bitmap4(argp, exid->spo_must_allow, + ARRAY_SIZE(exid->spo_must_allow)); + if (status) + return nfserr_bad_xdr; + + return nfs_ok; +} + static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, struct nfsd4_exchange_id *exid) @@ -1520,27 +1512,15 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, case SP4_NONE: break; case SP4_MACH_CRED: - /* spo_must_enforce */ - status = nfsd4_decode_bitmap(argp, - exid->spo_must_enforce); + status = nfsd4_decode_state_protect_ops(argp, exid); if (status) - goto out; - /* spo_must_allow */ - status = nfsd4_decode_bitmap(argp, exid->spo_must_allow); - if (status) - goto out; + return status; break; case SP4_SSV: /* ssp_ops */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy * 4); - p += dummy; - - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy * 4); - p += dummy; + status = nfsd4_decode_state_protect_ops(argp, exid); + if (status) + return status; /* ssp_hash_algs<> */ READ_BUF(4); From 547bfeb4cd8d491aabbd656d5a6f410cb4249b4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 11:17:50 -0500 Subject: [PATCH 085/131] NFSD: Add a separate decoder for ssv_sp_parms Refactor for clarity. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 70 +++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 07716e747ea2..35b2668b572e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1488,12 +1488,54 @@ nfsd4_decode_state_protect_ops(struct nfsd4_compoundargs *argp, return nfs_ok; } +/* + * This implementation currently does not support SP4_SSV. + * This decoder simply skips over these arguments. + */ +static noinline __be32 +nfsd4_decode_ssv_sp_parms(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) +{ + u32 count, window, num_gss_handles; + __be32 status; + + /* ssp_ops */ + status = nfsd4_decode_state_protect_ops(argp, exid); + if (status) + return status; + + /* ssp_hash_algs<> */ + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + while (count--) { + status = nfsd4_decode_ignored_string(argp, 0); + if (status) + return status; + } + + /* ssp_encr_algs<> */ + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + while (count--) { + status = nfsd4_decode_ignored_string(argp, 0); + if (status) + return status; + } + + if (xdr_stream_decode_u32(argp->xdr, &window) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &num_gss_handles) < 0) + return nfserr_bad_xdr; + + return nfs_ok; +} + static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, struct nfsd4_exchange_id *exid) { - int dummy, tmp; DECODE_HEAD; + int dummy; READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); @@ -1517,33 +1559,9 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, return status; break; case SP4_SSV: - /* ssp_ops */ - status = nfsd4_decode_state_protect_ops(argp, exid); + status = nfsd4_decode_ssv_sp_parms(argp, exid); if (status) return status; - - /* ssp_hash_algs<> */ - READ_BUF(4); - tmp = be32_to_cpup(p++); - while (tmp--) { - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); - } - - /* ssp_encr_algs<> */ - READ_BUF(4); - tmp = be32_to_cpup(p++); - while (tmp--) { - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); - } - - /* ignore ssp_window and ssp_num_gss_handles: */ - READ_BUF(8); break; default: goto xdr_error; From 523ec6ed6fb80fd1537d748a06bffd060a8b3235 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Nov 2020 15:19:12 -0500 Subject: [PATCH 086/131] NFSD: Add a helper to decode state_protect4_a Refactor for clarity. Also, remove a stale comment. Commit ed94164398c9 ("nfsd: implement machine credential support for some operations") added support for SP4_MACH_CRED, so state_protect_a is no longer completely ignored. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/nfs4xdr.c | 44 +++++++++++++++++++++++++++----------------- fs/nfsd/xdr4.h | 2 +- 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d7f27ed6b794..be6dcc4c2ab0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3066,7 +3066,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " - "ip_addr=%s flags %x, spa_how %d\n", + "ip_addr=%s flags %x, spa_how %u\n", __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 35b2668b572e..08909c7975dc 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1531,25 +1531,13 @@ nfsd4_decode_ssv_sp_parms(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) +nfsd4_decode_state_protect4_a(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) { - DECODE_HEAD; - int dummy; + __be32 status; - READ_BUF(NFS4_VERIFIER_SIZE); - COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); - - status = nfsd4_decode_opaque(argp, &exid->clname); - if (status) + if (xdr_stream_decode_u32(argp->xdr, &exid->spa_how) < 0) return nfserr_bad_xdr; - - READ_BUF(4); - exid->flags = be32_to_cpup(p++); - - /* Ignore state_protect4_a */ - READ_BUF(4); - exid->spa_how = be32_to_cpup(p++); switch (exid->spa_how) { case SP4_NONE: break; @@ -1564,9 +1552,31 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, return status; break; default: - goto xdr_error; + return nfserr_bad_xdr; } + return nfs_ok; +} + +static __be32 +nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) +{ + DECODE_HEAD; + int dummy; + + status = nfsd4_decode_verifier4(argp, &exid->verifier); + if (status) + return status; + status = nfsd4_decode_opaque(argp, &exid->clname); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &exid->flags) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_state_protect4_a(argp, exid); + if (status) + return status; + READ_BUF(4); /* nfs_impl_id4 array length */ dummy = be32_to_cpup(p++); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 6245004a9993..232529bc1b79 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -433,7 +433,7 @@ struct nfsd4_exchange_id { u32 flags; clientid_t clientid; u32 seqid; - int spa_how; + u32 spa_how; u32 spo_must_enforce[3]; u32 spo_must_allow[3]; struct xdr_netobj nii_domain; From 10ff84228197f47401833495ba19a50131323b4a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 15:21:55 -0500 Subject: [PATCH 087/131] NFSD: Add a helper to decode nfs_impl_id4 Refactor for clarity. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 63 ++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 08909c7975dc..867896060878 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1558,12 +1558,47 @@ nfsd4_decode_state_protect4_a(struct nfsd4_compoundargs *argp, return nfs_ok; } +static __be32 +nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) +{ + __be32 status; + u32 count; + + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + switch (count) { + case 0: + break; + case 1: + /* Note that RFC 8881 places no length limit on + * nii_domain, but this implementation permits no + * more than NFS4_OPAQUE_LIMIT bytes */ + status = nfsd4_decode_opaque(argp, &exid->nii_domain); + if (status) + return status; + /* Note that RFC 8881 places no length limit on + * nii_name, but this implementation permits no + * more than NFS4_OPAQUE_LIMIT bytes */ + status = nfsd4_decode_opaque(argp, &exid->nii_name); + if (status) + return status; + status = nfsd4_decode_nfstime4(argp, &exid->nii_time); + if (status) + return status; + break; + default: + return nfserr_bad_xdr; + } + + return nfs_ok; +} + static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, struct nfsd4_exchange_id *exid) { - DECODE_HEAD; - int dummy; + __be32 status; status = nfsd4_decode_verifier4(argp, &exid->verifier); if (status) @@ -1576,29 +1611,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, status = nfsd4_decode_state_protect4_a(argp, exid); if (status) return status; - - READ_BUF(4); /* nfs_impl_id4 array length */ - dummy = be32_to_cpup(p++); - - if (dummy > 1) - goto xdr_error; - - if (dummy == 1) { - status = nfsd4_decode_opaque(argp, &exid->nii_domain); - if (status) - goto xdr_error; - - /* nii_name */ - status = nfsd4_decode_opaque(argp, &exid->nii_name); - if (status) - goto xdr_error; - - /* nii_date */ - status = nfsd4_decode_time(argp, &exid->nii_time); - if (status) - goto xdr_error; - } - DECODE_TAIL; + return nfsd4_decode_nfs_impl_id4(argp, exid); } static __be32 From 3a3f1fbacb0960b628e5a9f07c78287312f7a99d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 15:35:05 -0500 Subject: [PATCH 088/131] NFSD: Add a helper to decode channel_attrs4 De-duplicate some code. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 71 +++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 867896060878..75e90345552e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1614,6 +1614,38 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, return nfsd4_decode_nfs_impl_id4(argp, exid); } +static __be32 +nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp, + struct nfsd4_channel_attrs *ca) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, XDR_UNIT * 7); + if (!p) + return nfserr_bad_xdr; + + /* headerpadsz is ignored */ + p++; + ca->maxreq_sz = be32_to_cpup(p++); + ca->maxresp_sz = be32_to_cpup(p++); + ca->maxresp_cached = be32_to_cpup(p++); + ca->maxops = be32_to_cpup(p++); + ca->maxreqs = be32_to_cpup(p++); + ca->nr_rdma_attrs = be32_to_cpup(p); + switch (ca->nr_rdma_attrs) { + case 0: + break; + case 1: + if (xdr_stream_decode_u32(argp->xdr, &ca->rdma_attrs) < 0) + return nfserr_bad_xdr; + break; + default: + return nfserr_bad_xdr; + } + + return nfs_ok; +} + static __be32 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, struct nfsd4_create_session *sess) @@ -1625,39 +1657,12 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, sess->seqid = be32_to_cpup(p++); sess->flags = be32_to_cpup(p++); - /* Fore channel attrs */ - READ_BUF(28); - p++; /* headerpadsz is always 0 */ - sess->fore_channel.maxreq_sz = be32_to_cpup(p++); - sess->fore_channel.maxresp_sz = be32_to_cpup(p++); - sess->fore_channel.maxresp_cached = be32_to_cpup(p++); - sess->fore_channel.maxops = be32_to_cpup(p++); - sess->fore_channel.maxreqs = be32_to_cpup(p++); - sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++); - if (sess->fore_channel.nr_rdma_attrs == 1) { - READ_BUF(4); - sess->fore_channel.rdma_attrs = be32_to_cpup(p++); - } else if (sess->fore_channel.nr_rdma_attrs > 1) { - dprintk("Too many fore channel attr bitmaps!\n"); - goto xdr_error; - } - - /* Back channel attrs */ - READ_BUF(28); - p++; /* headerpadsz is always 0 */ - sess->back_channel.maxreq_sz = be32_to_cpup(p++); - sess->back_channel.maxresp_sz = be32_to_cpup(p++); - sess->back_channel.maxresp_cached = be32_to_cpup(p++); - sess->back_channel.maxops = be32_to_cpup(p++); - sess->back_channel.maxreqs = be32_to_cpup(p++); - sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++); - if (sess->back_channel.nr_rdma_attrs == 1) { - READ_BUF(4); - sess->back_channel.rdma_attrs = be32_to_cpup(p++); - } else if (sess->back_channel.nr_rdma_attrs > 1) { - dprintk("Too many back channel attr bitmaps!\n"); - goto xdr_error; - } + status = nfsd4_decode_channel_attrs4(argp, &sess->fore_channel); + if (status) + return status; + status = nfsd4_decode_channel_attrs4(argp, &sess->back_channel); + if (status) + return status; READ_BUF(4); sess->callback_prog = be32_to_cpup(p++); From 81243e3fe37ed547fc4ed8aab1cec2865540bb18 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:52:44 -0500 Subject: [PATCH 089/131] NFSD: Replace READ* macros in nfsd4_decode_create_session() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 75e90345552e..7b38ecd1541b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1650,24 +1650,28 @@ static __be32 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, struct nfsd4_create_session *sess) { - DECODE_HEAD; - - READ_BUF(16); - COPYMEM(&sess->clientid, 8); - sess->seqid = be32_to_cpup(p++); - sess->flags = be32_to_cpup(p++); + __be32 status; + status = nfsd4_decode_clientid4(argp, &sess->clientid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &sess->seqid) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &sess->flags) < 0) + return nfserr_bad_xdr; status = nfsd4_decode_channel_attrs4(argp, &sess->fore_channel); if (status) return status; status = nfsd4_decode_channel_attrs4(argp, &sess->back_channel); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_cb_sec(argp, &sess->cb_sec); if (status) return status; - READ_BUF(4); - sess->callback_prog = be32_to_cpup(p++); - nfsd4_decode_cb_sec(argp, &sess->cb_sec); - DECODE_TAIL; + return nfs_ok; } static __be32 From 94e254af1f873b4b551db4c4549294f2c4d385ef Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 13:50:55 -0500 Subject: [PATCH 090/131] NFSD: Replace READ* macros in nfsd4_decode_destroy_session() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7b38ecd1541b..f578bdb9fb8d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1678,11 +1678,7 @@ static __be32 nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_session *destroy_session) { - DECODE_HEAD; - READ_BUF(NFS4_MAX_SESSIONID_LEN); - COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN); - - DECODE_TAIL; + return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid); } static __be32 From aec387d5909304810d899f7d90ae57df33f3a75c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Nov 2020 13:38:27 -0500 Subject: [PATCH 091/131] NFSD: Replace READ* macros in nfsd4_decode_free_stateid() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f578bdb9fb8d..205e07d2b9dd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1685,13 +1685,7 @@ static __be32 nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_free_stateid *free_stateid) { - DECODE_HEAD; - - READ_BUF(sizeof(stateid_t)); - free_stateid->fr_stateid.si_generation = be32_to_cpup(p++); - COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); - - DECODE_TAIL; + return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } static __be32 From 044959715f370b24870c95df3940add8710c5a29 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 15:03:50 -0500 Subject: [PATCH 092/131] NFSD: Replace READ* macros in nfsd4_decode_getdeviceinfo() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 48 +++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 205e07d2b9dd..b244d16eeade 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -638,6 +638,21 @@ nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp, return nfsd4_decode_opaque(argp, owner); } +#ifdef CONFIG_NFSD_PNFS +static __be32 +nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp, + struct nfsd4_deviceid *devid) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE); + if (!p) + return nfserr_bad_xdr; + memcpy(devid, p, sizeof(*devid)); + return nfs_ok; +} +#endif /* CONFIG_NFSD_PNFS */ + static __be32 nfsd4_decode_sessionid4(struct nfsd4_compoundargs *argp, struct nfs4_sessionid *sessionid) @@ -1765,27 +1780,20 @@ static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, struct nfsd4_getdeviceinfo *gdev) { - DECODE_HEAD; - u32 num, i; + __be32 status; - READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4); - COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid)); - gdev->gd_layout_type = be32_to_cpup(p++); - gdev->gd_maxcount = be32_to_cpup(p++); - num = be32_to_cpup(p++); - if (num) { - if (num > 1000) - goto xdr_error; - READ_BUF(4 * num); - gdev->gd_notify_types = be32_to_cpup(p++); - for (i = 1; i < num; i++) { - if (be32_to_cpup(p++)) { - status = nfserr_inval; - goto out; - } - } - } - DECODE_TAIL; + status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_maxcount) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_uint32_array(argp->xdr, + &gdev->gd_notify_types, 1) < 0) + return nfserr_bad_xdr; + + return nfs_ok; } static __be32 From 5185980d8a23001c2317c290129ab7ab20067e20 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 10:40:07 -0500 Subject: [PATCH 093/131] NFSD: Replace READ* macros in nfsd4_decode_layoutcommit() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 120 ++++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 62 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b244d16eeade..0b8f7dedfdfb 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -274,20 +274,6 @@ nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp) return nfs_ok; } -static __be32 -nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv) -{ - DECODE_HEAD; - - READ_BUF(12); - p = xdr_decode_hyper(p, &tv->tv_sec); - tv->tv_nsec = be32_to_cpup(p++); - if (tv->tv_nsec >= (u32)1000000000) - return nfserr_inval; - - DECODE_TAIL; -} - static __be32 nfsd4_decode_nfstime4(struct nfsd4_compoundargs *argp, struct timespec64 *tv) { @@ -651,6 +637,29 @@ nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp, memcpy(devid, p, sizeof(*devid)); return nfs_ok; } + +static __be32 +nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp, + struct nfsd4_layoutcommit *lcp) +{ + if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0) + return nfserr_bad_xdr; + if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES) + return nfserr_bad_xdr; + if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX) + return nfserr_bad_xdr; + + if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0) + return nfserr_bad_xdr; + if (lcp->lc_up_len > 0) { + lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len); + if (!lcp->lc_up_layout) + return nfserr_bad_xdr; + } + + return nfs_ok; +} + #endif /* CONFIG_NFSD_PNFS */ static __be32 @@ -1796,6 +1805,41 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, return nfs_ok; } +static __be32 +nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, + struct nfsd4_layoutcommit *lcp) +{ + __be32 *p, status; + + if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_bool(argp->xdr, &lcp->lc_reclaim) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &lcp->lc_sid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_newoffset) < 0) + return nfserr_bad_xdr; + if (lcp->lc_newoffset) { + if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_last_wr) < 0) + return nfserr_bad_xdr; + } else + lcp->lc_last_wr = 0; + p = xdr_inline_decode(argp->xdr, XDR_UNIT); + if (!p) + return nfserr_bad_xdr; + if (xdr_item_is_present(p)) { + status = nfsd4_decode_nfstime4(argp, &lcp->lc_mtime); + if (status) + return status; + } else { + lcp->lc_mtime.tv_nsec = UTIME_NOW; + } + return nfsd4_decode_layoutupdate4(argp, lcp); +} + static __be32 nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, struct nfsd4_layoutget *lgp) @@ -1820,54 +1864,6 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, DECODE_TAIL; } -static __be32 -nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutcommit *lcp) -{ - DECODE_HEAD; - u32 timechange; - - READ_BUF(20); - p = xdr_decode_hyper(p, &lcp->lc_seg.offset); - p = xdr_decode_hyper(p, &lcp->lc_seg.length); - lcp->lc_reclaim = be32_to_cpup(p++); - - status = nfsd4_decode_stateid(argp, &lcp->lc_sid); - if (status) - return status; - - READ_BUF(4); - lcp->lc_newoffset = be32_to_cpup(p++); - if (lcp->lc_newoffset) { - READ_BUF(8); - p = xdr_decode_hyper(p, &lcp->lc_last_wr); - } else - lcp->lc_last_wr = 0; - READ_BUF(4); - timechange = be32_to_cpup(p++); - if (timechange) { - status = nfsd4_decode_time(argp, &lcp->lc_mtime); - if (status) - return status; - } else { - lcp->lc_mtime.tv_nsec = UTIME_NOW; - } - READ_BUF(8); - lcp->lc_layout_type = be32_to_cpup(p++); - - /* - * Save the layout update in XDR format and let the layout driver deal - * with it later. - */ - lcp->lc_up_len = be32_to_cpup(p++); - if (lcp->lc_up_len > 0) { - READ_BUF(lcp->lc_up_len); - READMEM(lcp->lc_up_layout, lcp->lc_up_len); - } - - DECODE_TAIL; -} - static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, struct nfsd4_layoutreturn *lrp) From c8e88e3aa73889421461f878cd569ef84f231ceb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 15:06:04 -0500 Subject: [PATCH 094/131] NFSD: Replace READ* macros in nfsd4_decode_layoutget() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0b8f7dedfdfb..d397dcedabb4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1844,24 +1844,27 @@ static __be32 nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, struct nfsd4_layoutget *lgp) { - DECODE_HEAD; + __be32 status; - READ_BUF(36); - lgp->lg_signal = be32_to_cpup(p++); - lgp->lg_layout_type = be32_to_cpup(p++); - lgp->lg_seg.iomode = be32_to_cpup(p++); - p = xdr_decode_hyper(p, &lgp->lg_seg.offset); - p = xdr_decode_hyper(p, &lgp->lg_seg.length); - p = xdr_decode_hyper(p, &lgp->lg_minlength); - - status = nfsd4_decode_stateid(argp, &lgp->lg_sid); + if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_seg.iomode) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.length) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_minlength) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &lgp->lg_sid); if (status) return status; + if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_maxcount) < 0) + return nfserr_bad_xdr; - READ_BUF(4); - lgp->lg_maxcount = be32_to_cpup(p++); - - DECODE_TAIL; + return nfs_ok; } static __be32 From 645fcad371420913c30e9aca80fc0a38f3acf432 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 10:42:25 -0500 Subject: [PATCH 095/131] NFSD: Replace READ* macros in nfsd4_decode_layoutreturn() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 72 +++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d397dcedabb4..4a4cf3ea16f9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -660,6 +660,43 @@ nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp, return nfs_ok; } +static __be32 +nfsd4_decode_layoutreturn4(struct nfsd4_compoundargs *argp, + struct nfsd4_layoutreturn *lrp) +{ + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_return_type) < 0) + return nfserr_bad_xdr; + switch (lrp->lr_return_type) { + case RETURN_FILE: + if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.length) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &lrp->lr_sid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &lrp->lrf_body_len) < 0) + return nfserr_bad_xdr; + if (lrp->lrf_body_len > 0) { + lrp->lrf_body = xdr_inline_decode(argp->xdr, lrp->lrf_body_len); + if (!lrp->lrf_body) + return nfserr_bad_xdr; + } + break; + case RETURN_FSID: + case RETURN_ALL: + lrp->lr_seg.offset = 0; + lrp->lr_seg.length = NFS4_MAX_UINT64; + break; + default: + return nfserr_bad_xdr; + } + + return nfs_ok; +} + #endif /* CONFIG_NFSD_PNFS */ static __be32 @@ -1871,34 +1908,13 @@ static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, struct nfsd4_layoutreturn *lrp) { - DECODE_HEAD; - - READ_BUF(16); - lrp->lr_reclaim = be32_to_cpup(p++); - lrp->lr_layout_type = be32_to_cpup(p++); - lrp->lr_seg.iomode = be32_to_cpup(p++); - lrp->lr_return_type = be32_to_cpup(p++); - if (lrp->lr_return_type == RETURN_FILE) { - READ_BUF(16); - p = xdr_decode_hyper(p, &lrp->lr_seg.offset); - p = xdr_decode_hyper(p, &lrp->lr_seg.length); - - status = nfsd4_decode_stateid(argp, &lrp->lr_sid); - if (status) - return status; - - READ_BUF(4); - lrp->lrf_body_len = be32_to_cpup(p++); - if (lrp->lrf_body_len > 0) { - READ_BUF(lrp->lrf_body_len); - READMEM(lrp->lrf_body, lrp->lrf_body_len); - } - } else { - lrp->lr_seg.offset = 0; - lrp->lr_seg.length = NFS4_MAX_UINT64; - } - - DECODE_TAIL; + if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_seg.iomode) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_layoutreturn4(argp, lrp); } #endif /* CONFIG_NFSD_PNFS */ From 53d70873e37c09a582167ed73d1858e3a2af0157 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:33:12 -0500 Subject: [PATCH 096/131] NFSD: Replace READ* macros in nfsd4_decode_secinfo_no_name() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4a4cf3ea16f9..2a738af6106a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1356,17 +1356,6 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); } -static __be32 -nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo_no_name *sin) -{ - DECODE_HEAD; - - READ_BUF(4); - sin->sin_style = be32_to_cpup(p++); - DECODE_TAIL; -} - static __be32 nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) { @@ -1918,6 +1907,14 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, } #endif /* CONFIG_NFSD_PNFS */ +static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, + struct nfsd4_secinfo_no_name *sin) +{ + if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) + return nfserr_bad_xdr; + return nfs_ok; +} + static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, struct nfsd4_fallocate *fallocate) From cf907b11326d9360877d6c6ea8f75e1b29f39f2f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:55:19 -0500 Subject: [PATCH 097/131] NFSD: Replace READ* macros in nfsd4_decode_sequence() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2a738af6106a..eae416eef7cd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1738,22 +1738,6 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } -static __be32 -nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, - struct nfsd4_sequence *seq) -{ - DECODE_HEAD; - - READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); - COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - seq->seqid = be32_to_cpup(p++); - seq->slotid = be32_to_cpup(p++); - seq->maxslots = be32_to_cpup(p++); - seq->cachethis = be32_to_cpup(p++); - - DECODE_TAIL; -} - static __be32 nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) { @@ -1915,6 +1899,26 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, return nfs_ok; } +static __be32 +nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, + struct nfsd4_sequence *seq) +{ + __be32 *p, status; + + status = nfsd4_decode_sessionid4(argp, &seq->sessionid); + if (status) + return status; + p = xdr_inline_decode(argp->xdr, XDR_UNIT * 4); + if (!p) + return nfserr_bad_xdr; + seq->seqid = be32_to_cpup(p++); + seq->slotid = be32_to_cpup(p++); + seq->maxslots = be32_to_cpup(p++); + seq->cachethis = be32_to_cpup(p); + + return nfs_ok; +} + static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, struct nfsd4_fallocate *fallocate) From b7a0c8f6e741bf9dee0d24e69d3ce51fa4ccce78 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 14:57:44 -0500 Subject: [PATCH 098/131] NFSD: Replace READ* macros in nfsd4_decode_test_stateid() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 61 +++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index eae416eef7cd..3c42e04b9b3d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1738,42 +1738,6 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } -static __be32 -nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) -{ - int i; - __be32 *p, status; - struct nfsd4_test_stateid_id *stateid; - - READ_BUF(4); - test_stateid->ts_num_ids = ntohl(*p++); - - INIT_LIST_HEAD(&test_stateid->ts_stateid_list); - - for (i = 0; i < test_stateid->ts_num_ids; i++) { - stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); - if (!stateid) { - status = nfserrno(-ENOMEM); - goto out; - } - - INIT_LIST_HEAD(&stateid->ts_id_list); - list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); - - status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid); - if (status) - goto out; - } - - status = 0; -out: - return status; -xdr_error: - dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__); - status = nfserr_bad_xdr; - goto out; -} - static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc) { DECODE_HEAD; @@ -1919,6 +1883,31 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, return nfs_ok; } +static __be32 +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +{ + struct nfsd4_test_stateid_id *stateid; + __be32 status; + u32 i; + + if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0) + return nfserr_bad_xdr; + + INIT_LIST_HEAD(&test_stateid->ts_stateid_list); + for (i = 0; i < test_stateid->ts_num_ids; i++) { + stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); + if (!stateid) + return nfserrno(-ENOMEM); /* XXX: not jukebox? */ + INIT_LIST_HEAD(&stateid->ts_id_list); + list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); + status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid); + if (status) + return status; + } + + return nfs_ok; +} + static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, struct nfsd4_fallocate *fallocate) From c95f2ec3490586cbb33badc8f4c82d6aa4955078 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 15:15:09 -0500 Subject: [PATCH 099/131] NFSD: Replace READ* macros in nfsd4_decode_destroy_clientid() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3c42e04b9b3d..c4da89a5d24f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1738,16 +1738,6 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } -static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc) -{ - DECODE_HEAD; - - READ_BUF(8); - COPYMEM(&dc->clientid, 8); - - DECODE_TAIL; -} - static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) { DECODE_HEAD; @@ -1908,6 +1898,12 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta return nfs_ok; } +static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, + struct nfsd4_destroy_clientid *dc) +{ + return nfsd4_decode_clientid4(argp, &dc->clientid); +} + static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, struct nfsd4_fallocate *fallocate) From 0d6467844d437e07db1e76d96176b1a55401018c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 15:02:11 -0500 Subject: [PATCH 100/131] NFSD: Replace READ* macros in nfsd4_decode_reclaim_complete() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c4da89a5d24f..59f89af5b924 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1738,16 +1738,6 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } -static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) -{ - DECODE_HEAD; - - READ_BUF(4); - rc->rca_one_fs = be32_to_cpup(p++); - - DECODE_TAIL; -} - #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, @@ -1904,6 +1894,14 @@ static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, return nfsd4_decode_clientid4(argp, &dc->clientid); } +static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, + struct nfsd4_reclaim_complete *rc) +{ + if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0) + return nfserr_bad_xdr; + return nfs_ok; +} + static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, struct nfsd4_fallocate *fallocate) From 6aef27aaeae7611f98af08205acc79f5a8f3aa59 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 10:44:05 -0500 Subject: [PATCH 101/131] NFSD: Replace READ* macros in nfsd4_decode_fallocate() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 59f89af5b924..b9f448dc8247 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1906,17 +1906,17 @@ static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, struct nfsd4_fallocate *fallocate) { - DECODE_HEAD; + __be32 status; - status = nfsd4_decode_stateid(argp, &fallocate->falloc_stateid); + status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid); if (status) return status; + if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_length) < 0) + return nfserr_bad_xdr; - READ_BUF(16); - p = xdr_decode_hyper(p, &fallocate->falloc_offset); - xdr_decode_hyper(p, &fallocate->falloc_length); - - DECODE_TAIL; + return nfs_ok; } static __be32 From f49e4b4d58cc835d8bd0cc9663f7b9c5497e0e7e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 18:05:06 -0500 Subject: [PATCH 102/131] NFSD: Replace READ* macros in nfsd4_decode_nl4_server() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b9f448dc8247..e9382bfb4a97 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1941,36 +1941,42 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, struct nl4_server *ns) { - DECODE_HEAD; struct nfs42_netaddr *naddr; + __be32 *p; - READ_BUF(4); - ns->nl4_type = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &ns->nl4_type) < 0) + return nfserr_bad_xdr; /* currently support for 1 inter-server source server */ switch (ns->nl4_type) { case NL4_NETADDR: naddr = &ns->u.nl4_addr; - READ_BUF(4); - naddr->netid_len = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &naddr->netid_len) < 0) + return nfserr_bad_xdr; if (naddr->netid_len > RPCBIND_MAXNETIDLEN) - goto xdr_error; + return nfserr_bad_xdr; - READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */ - COPYMEM(naddr->netid, naddr->netid_len); + p = xdr_inline_decode(argp->xdr, naddr->netid_len); + if (!p) + return nfserr_bad_xdr; + memcpy(naddr->netid, p, naddr->netid_len); - naddr->addr_len = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &naddr->addr_len) < 0) + return nfserr_bad_xdr; if (naddr->addr_len > RPCBIND_MAXUADDRLEN) - goto xdr_error; + return nfserr_bad_xdr; - READ_BUF(naddr->addr_len); - COPYMEM(naddr->addr, naddr->addr_len); + p = xdr_inline_decode(argp->xdr, naddr->addr_len); + if (!p) + return nfserr_bad_xdr; + memcpy(naddr->addr, p, naddr->addr_len); break; default: - goto xdr_error; + return nfserr_bad_xdr; } - DECODE_TAIL; + + return nfs_ok; } static __be32 From e8febea7190bcbd1e608093acb67f2a5009556aa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 10:49:37 -0500 Subject: [PATCH 103/131] NFSD: Replace READ* macros in nfsd4_decode_copy() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 41 ++++++++++++++++++++++------------------- fs/nfsd/xdr4.h | 2 +- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e9382bfb4a97..42b5e8392f47 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1982,40 +1982,44 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) { - DECODE_HEAD; struct nl4_server *ns_dummy; - int i, count; + u32 consecutive, i, count; + __be32 status; - status = nfsd4_decode_stateid(argp, ©->cp_src_stateid); + status = nfsd4_decode_stateid4(argp, ©->cp_src_stateid); if (status) return status; - status = nfsd4_decode_stateid(argp, ©->cp_dst_stateid); + status = nfsd4_decode_stateid4(argp, ©->cp_dst_stateid); if (status) return status; + if (xdr_stream_decode_u64(argp->xdr, ©->cp_src_pos) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, ©->cp_dst_pos) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, ©->cp_count) < 0) + return nfserr_bad_xdr; + /* ca_consecutive: we always do consecutive copies */ + if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, ©->cp_synchronous) < 0) + return nfserr_bad_xdr; - READ_BUF(8 + 8 + 8 + 4 + 4 + 4); - p = xdr_decode_hyper(p, ©->cp_src_pos); - p = xdr_decode_hyper(p, ©->cp_dst_pos); - p = xdr_decode_hyper(p, ©->cp_count); - p++; /* ca_consecutive: we always do consecutive copies */ - copy->cp_synchronous = be32_to_cpup(p++); - - count = be32_to_cpup(p++); - + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; copy->cp_intra = false; if (count == 0) { /* intra-server copy */ copy->cp_intra = true; - goto intra; + return nfs_ok; } - /* decode all the supplied server addresses but use first */ + /* decode all the supplied server addresses but use only the first */ status = nfsd4_decode_nl4_server(argp, ©->cp_src); if (status) return status; ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); if (ns_dummy == NULL) - return nfserrno(-ENOMEM); + return nfserrno(-ENOMEM); /* XXX: jukebox? */ for (i = 0; i < count - 1; i++) { status = nfsd4_decode_nl4_server(argp, ns_dummy); if (status) { @@ -2024,9 +2028,8 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) } } kfree(ns_dummy); -intra: - DECODE_TAIL; + return nfs_ok; } static __be32 @@ -4781,7 +4784,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, - copy->cp_synchronous); + !!copy->cp_synchronous); if (nfserr) return nfserr; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 232529bc1b79..facc5762bf83 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -554,7 +554,7 @@ struct nfsd4_copy { bool cp_intra; /* both */ - bool cp_synchronous; + u32 cp_synchronous; /* response */ struct nfsd42_write_res cp_res; From f9a953fb369bbd2135ccead3393ec1ef66544471 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 21 Nov 2020 14:19:24 -0500 Subject: [PATCH 104/131] NFSD: Replace READ* macros in nfsd4_decode_copy_notify() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 42b5e8392f47..97694df51388 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2032,25 +2032,25 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) return nfs_ok; } -static __be32 -nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, - struct nfsd4_offload_status *os) -{ - return nfsd4_decode_stateid(argp, &os->stateid); -} - static __be32 nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, struct nfsd4_copy_notify *cn) { __be32 status; - status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid); + status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid); if (status) return status; return nfsd4_decode_nl4_server(argp, &cn->cpn_dst); } +static __be32 +nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, + struct nfsd4_offload_status *os) +{ + return nfsd4_decode_stateid(argp, &os->stateid); +} + static __be32 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { From 2846bb0525a73e00b3566fda535ea6a5879e2971 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 21 Nov 2020 14:21:25 -0500 Subject: [PATCH 105/131] NFSD: Replace READ* macros in nfsd4_decode_offload_status() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 97694df51388..74ca44831ec4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2048,7 +2048,7 @@ static __be32 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, struct nfsd4_offload_status *os) { - return nfsd4_decode_stateid(argp, &os->stateid); + return nfsd4_decode_stateid4(argp, &os->stateid); } static __be32 From 9d32b412fe0a6186cc57789d218e8f8299454ae2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 10:54:47 -0500 Subject: [PATCH 106/131] NFSD: Replace READ* macros in nfsd4_decode_seek() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 74ca44831ec4..a5a3dc3fea0a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2054,17 +2054,17 @@ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { - DECODE_HEAD; + __be32 status; - status = nfsd4_decode_stateid(argp, &seek->seek_stateid); + status = nfsd4_decode_stateid4(argp, &seek->seek_stateid); if (status) return status; + if (xdr_stream_decode_u64(argp->xdr, &seek->seek_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0) + return nfserr_bad_xdr; - READ_BUF(8 + 4); - p = xdr_decode_hyper(p, &seek->seek_offset); - seek->seek_whence = be32_to_cpup(p); - - DECODE_TAIL; + return nfs_ok; } /* From 3dfd0b0e15671e2b4047ccb9222432f0b2d930be Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 10:46:46 -0500 Subject: [PATCH 107/131] NFSD: Replace READ* macros in nfsd4_decode_clone() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 52 +++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a5a3dc3fea0a..baf17d29097a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -575,18 +575,6 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, return nfs_ok; } -static __be32 -nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) -{ - DECODE_HEAD; - - READ_BUF(sizeof(stateid_t)); - sid->si_generation = be32_to_cpup(p++); - COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); - - DECODE_TAIL; -} - static __be32 nfsd4_decode_stateid4(struct nfsd4_compoundargs *argp, stateid_t *sid) { @@ -1919,25 +1907,6 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, return nfs_ok; } -static __be32 -nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) -{ - DECODE_HEAD; - - status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid); - if (status) - return status; - status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid); - if (status) - return status; - - READ_BUF(8 + 8 + 8); - p = xdr_decode_hyper(p, &clone->cl_src_pos); - p = xdr_decode_hyper(p, &clone->cl_dst_pos); - p = xdr_decode_hyper(p, &clone->cl_count); - DECODE_TAIL; -} - static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, struct nl4_server *ns) { @@ -2067,6 +2036,27 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) return nfs_ok; } +static __be32 +nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +{ + __be32 status; + + status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid); + if (status) + return status; + status = nfsd4_decode_stateid4(argp, &clone->cl_dst_stateid); + if (status) + return status; + if (xdr_stream_decode_u64(argp->xdr, &clone->cl_src_pos) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &clone->cl_dst_pos) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &clone->cl_count) < 0) + return nfserr_bad_xdr; + + return nfs_ok; +} + /* * XDR data that is more than PAGE_SIZE in size is normally part of a * read or write. However, the size of extended attributes is limited From 830c71502ae0ae1677ac6c08ffbcf85a6e7b2937 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 10:56:52 -0500 Subject: [PATCH 108/131] NFSD: Replace READ* macros in nfsd4_decode_xattr_name() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index baf17d29097a..414960948d65 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2117,25 +2117,22 @@ nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr, static __be32 nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) { - DECODE_HEAD; char *name, *sp, *dp; u32 namelen, cnt; + __be32 *p; - READ_BUF(4); - namelen = be32_to_cpup(p++); - + if (xdr_stream_decode_u32(argp->xdr, &namelen) < 0) + return nfserr_bad_xdr; if (namelen > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) return nfserr_nametoolong; - if (namelen == 0) - goto xdr_error; - - READ_BUF(namelen); - + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, namelen); + if (!p) + return nfserr_bad_xdr; name = svcxdr_tmpalloc(argp, namelen + XATTR_USER_PREFIX_LEN + 1); if (!name) return nfserr_jukebox; - memcpy(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); /* @@ -2148,14 +2145,14 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) while (cnt-- > 0) { if (*sp == '\0') - goto xdr_error; + return nfserr_bad_xdr; *dp++ = *sp++; } *dp = '\0'; *namep = name; - DECODE_TAIL; + return nfs_ok; } /* From 403366a7e8e2930002157525cd44add7fa01bca9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 10:59:57 -0500 Subject: [PATCH 109/131] NFSD: Replace READ* macros in nfsd4_decode_setxattr() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 414960948d65..02e600999da1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2184,11 +2184,11 @@ static __be32 nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, struct nfsd4_setxattr *setxattr) { - DECODE_HEAD; u32 flags, maxcount, size; + __be32 status; - READ_BUF(4); - flags = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &flags) < 0) + return nfserr_bad_xdr; if (flags > SETXATTR4_REPLACE) return nfserr_inval; @@ -2201,8 +2201,8 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, maxcount = svc_max_payload(argp->rqstp); maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); - READ_BUF(4); - size = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &size) < 0) + return nfserr_bad_xdr; if (size > maxcount) return nfserr_xattr2big; @@ -2211,12 +2211,12 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, struct xdr_buf payload; if (!xdr_stream_subsegment(argp->xdr, &payload, size)) - goto xdr_error; + return nfserr_bad_xdr; status = nfsd4_vbuf_from_vector(argp, &payload, &setxattr->setxa_buf, size); } - DECODE_TAIL; + return nfs_ok; } static __be32 From 2212036cadf4da3c4b0e4bd2a9a8c3d78617ab4f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 11:04:02 -0500 Subject: [PATCH 110/131] NFSD: Replace READ* macros in nfsd4_decode_listxattrs() Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 02e600999da1..777b11810e95 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2223,11 +2223,10 @@ static __be32 nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, struct nfsd4_listxattrs *listxattrs) { - DECODE_HEAD; u32 maxcount; - READ_BUF(12); - p = xdr_decode_hyper(p, &listxattrs->lsxa_cookie); + if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0) + return nfserr_bad_xdr; /* * If the cookie is too large to have even one user.x attribute @@ -2237,7 +2236,8 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, (XATTR_LIST_MAX / (XATTR_USER_PREFIX_LEN + 2))) return nfserr_badcookie; - maxcount = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &maxcount) < 0) + return nfserr_bad_xdr; if (maxcount < 8) /* Always need at least 2 words (length and one character) */ return nfserr_inval; @@ -2245,7 +2245,7 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, maxcount = min(maxcount, svc_max_payload(argp->rqstp)); listxattrs->lsxa_maxcount = maxcount; - DECODE_TAIL; + return nfs_ok; } static __be32 From 3a237b4af5b7b0e77588e120554077cab3341943 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 22 Nov 2020 12:49:52 -0500 Subject: [PATCH 111/131] NFSD: Make nfsd4_ops::opnum a u32 Avoid passing a "pointer to int" argument to xdr_stream_decode_u32. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4xdr.c | 7 +++---- fs/nfsd/xdr4.h | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 56d074a6cb31..73e717609213 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -3282,7 +3282,7 @@ int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) void warn_on_nonidempotent_op(struct nfsd4_op *op) { if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { - pr_err("unable to encode reply to nonidempotent op %d (%s)\n", + pr_err("unable to encode reply to nonidempotent op %u (%s)\n", op->opnum, nfsd4_op_name(op->opnum)); WARN_ON_ONCE(1); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 777b11810e95..a3cfc1d071a6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2419,9 +2419,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op = &argp->ops[i]; op->replay = NULL; - READ_BUF(4); - op->opnum = be32_to_cpup(p++); - + if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0) + return nfserr_bad_xdr; if (nfsd4_opnum_in_range(argp, op)) { op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); if (op->status != nfs_ok) @@ -5378,7 +5377,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) if (op->status && opdesc && !(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE)) goto status; - BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || + BUG_ON(op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index facc5762bf83..2c31f3a7d7c7 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -615,7 +615,7 @@ struct nfsd4_copy_notify { }; struct nfsd4_op { - int opnum; + u32 opnum; const struct nfsd4_operation * opdesc; __be32 status; union nfsd4_op_u { From d9b74bdac6f24afc3101b6a5b6f59842610c9c94 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 11:07:06 -0500 Subject: [PATCH 112/131] NFSD: Replace READ* macros in nfsd4_decode_compound() And clean-up: Now that we have removed the DECODE_TAIL macro from nfsd4_decode_compound(), we observe that there's no benefit for nfsd4_decode_compound() to return nfs_ok or nfserr_bad_xdr only to have its sole caller convert those values to one or zero, respectively. Have nfsd4_decode_compound() return 1/0 instead. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 67 ++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 39 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a3cfc1d071a6..f8b5750aae4e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -186,28 +186,6 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) return p; } -/** - * savemem - duplicate a chunk of memory for later processing - * @argp: NFSv4 compound argument structure to be freed with - * @p: pointer to be duplicated - * @nbytes: length to be duplicated - * - * Returns a pointer to a copy of @nbytes bytes of memory at @p - * that are preserved until processing of the NFSv4 compound - * operation described by @argp finishes. - */ -static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) -{ - void *ret; - - ret = svcxdr_tmpalloc(argp, nbytes); - if (!ret) - return NULL; - memcpy(ret, p, nbytes); - return ret; -} - - /* * NFSv4 basic data type decoders */ @@ -2372,43 +2350,54 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) return true; } -static __be32 +static int nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { - DECODE_HEAD; struct nfsd4_op *op; bool cachethis = false; int auth_slack= argp->rqstp->rq_auth_slack; int max_reply = auth_slack + 8; /* opcnt, status */ int readcount = 0; int readbytes = 0; + __be32 *p; int i; - READ_BUF(4); - argp->taglen = be32_to_cpup(p++); - READ_BUF(argp->taglen); - SAVEMEM(argp->tag, argp->taglen); - READ_BUF(8); - argp->minorversion = be32_to_cpup(p++); - argp->opcnt = be32_to_cpup(p++); - max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); + if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0) + return 0; + max_reply += XDR_UNIT; + argp->tag = NULL; + if (unlikely(argp->taglen)) { + if (argp->taglen > NFSD4_MAX_TAGLEN) + return 0; + p = xdr_inline_decode(argp->xdr, argp->taglen); + if (!p) + return 0; + argp->tag = svcxdr_tmpalloc(argp, argp->taglen); + if (!argp->tag) + return 0; + memcpy(argp->tag, p, argp->taglen); + max_reply += xdr_align_size(argp->taglen); + } + + if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) + return 0; + if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) + return 0; - if (argp->taglen > NFSD4_MAX_TAGLEN) - goto xdr_error; /* * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS * here, so we return success at the xdr level so that * nfsd4_proc can handle this is an NFS-level error. */ if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) - return 0; + return 1; if (argp->opcnt > ARRAY_SIZE(argp->iops)) { argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); if (!argp->ops) { argp->ops = argp->iops; dprintk("nfsd: couldn't allocate room for COMPOUND\n"); - goto xdr_error; + return 0; } } @@ -2420,7 +2409,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->replay = NULL; if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0) - return nfserr_bad_xdr; + return 0; if (nfsd4_opnum_in_range(argp, op)) { op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); if (op->status != nfs_ok) @@ -2467,7 +2456,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); - DECODE_TAIL; + return 1; } static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, @@ -5479,7 +5468,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) args->ops = args->iops; args->rqstp = rqstp; - return !nfsd4_decode_compound(args); + return nfsd4_decode_compound(args); } int From 5cfc822f3e77b0477e6602d399116130317f537a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Nov 2020 11:12:18 -0500 Subject: [PATCH 113/131] NFSD: Remove macros that are no longer used Now that all the NFSv4 decoder functions have been converted to make direct calls to the xdr helpers, remove the unused C macros. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 40 ---------------------------------------- fs/nfsd/xdr4.h | 9 --------- 2 files changed, 49 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f8b5750aae4e..7142b0501451 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -102,45 +102,6 @@ check_filename(char *str, int len) return 0; } -#define DECODE_HEAD \ - __be32 *p; \ - __be32 status -#define DECODE_TAIL \ - status = 0; \ -out: \ - return status; \ -xdr_error: \ - dprintk("NFSD: xdr error (%s:%d)\n", \ - __FILE__, __LINE__); \ - status = nfserr_bad_xdr; \ - goto out - -#define READMEM(x,nbytes) do { \ - x = (char *)p; \ - p += XDR_QUADLEN(nbytes); \ -} while (0) -#define SAVEMEM(x,nbytes) do { \ - if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ - savemem(argp, p, nbytes) : \ - (char *)p)) { \ - dprintk("NFSD: xdr error (%s:%d)\n", \ - __FILE__, __LINE__); \ - goto xdr_error; \ - } \ - p += XDR_QUADLEN(nbytes); \ -} while (0) -#define COPYMEM(x,nbytes) do { \ - memcpy((x), p, nbytes); \ - p += XDR_QUADLEN(nbytes); \ -} while (0) -#define READ_BUF(nbytes) \ - do { \ - p = xdr_inline_decode(argp->xdr,\ - nbytes); \ - if (!p) \ - goto xdr_error; \ - } while (0) - static int zero_clientid(clientid_t *clid) { return (clid->cl_boot == 0) && (clid->cl_id == 0); @@ -5461,7 +5422,6 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) struct nfsd4_compoundargs *args = rqstp->rq_argp; /* svcxdr_tmp_alloc */ - args->tmpp = NULL; args->to_free = NULL; args->xdr = &rqstp->rq_arg_stream; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 2c31f3a7d7c7..e12fbe382e3f 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -386,13 +386,6 @@ struct nfsd4_setclientid_confirm { nfs4_verifier sc_confirm; }; -struct nfsd4_saved_compoundargs { - __be32 *p; - __be32 *end; - int pagelen; - struct page **pagelist; -}; - struct nfsd4_test_stateid_id { __be32 ts_id_status; stateid_t ts_id_stateid; @@ -696,8 +689,6 @@ struct svcxdr_tmpbuf { struct nfsd4_compoundargs { /* scratch variables for XDR decode */ - __be32 tmp[8]; - __be32 * tmpp; struct xdr_stream *xdr; struct svcxdr_tmpbuf *to_free; struct svc_rqst *rqstp; From 4420440c57892779f265108f46f83832a88ca795 Mon Sep 17 00:00:00 2001 From: kazuo ito Date: Fri, 27 Nov 2020 15:26:59 +0900 Subject: [PATCH 114/131] nfsd: Fix message level for normal termination The warning message from nfsd terminating normally can confuse system adminstrators or monitoring software. Though it's not exactly fair to pin-point a commit where it originated, the current form in the current place started to appear in: Fixes: e096bbc6488d ("knfsd: remove special handling for SIGHUP") Signed-off-by: kazuo ito Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1575e567bbfa..00384c332f9b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -529,8 +529,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) return; nfsd_shutdown_net(net); - printk(KERN_WARNING "nfsd: last server has exited, flushing export " - "cache\n"); + pr_info("nfsd: last server has exited, flushing export cache\n"); nfsd_export_flush(net); } From 4b5cff7ed8afcdd35bfff39ab503342900ec80c6 Mon Sep 17 00:00:00 2001 From: Roberto Bergantinos Corpas Date: Fri, 27 Nov 2020 19:38:31 +0100 Subject: [PATCH 115/131] sunrpc: clean-up cache downcall We can simplify code around cache_downcall unifying memory allocations using kvmalloc. This has the benefit of getting rid of cache_slow_downcall (and queue_io_mutex), and also matches userland allocation size and limits. Signed-off-by: Roberto Bergantinos Corpas Signed-off-by: Chuck Lever --- net/sunrpc/cache.c | 41 +++++++++++------------------------------ 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 20c93b68505e..1a2c1c44bb00 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -778,7 +778,6 @@ void cache_clean_deferred(void *owner) */ static DEFINE_SPINLOCK(queue_lock); -static DEFINE_MUTEX(queue_io_mutex); struct cache_queue { struct list_head list; @@ -906,44 +905,26 @@ static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, return ret; } -static ssize_t cache_slow_downcall(const char __user *buf, - size_t count, struct cache_detail *cd) -{ - static char write_buf[32768]; /* protected by queue_io_mutex */ - ssize_t ret = -EINVAL; - - if (count >= sizeof(write_buf)) - goto out; - mutex_lock(&queue_io_mutex); - ret = cache_do_downcall(write_buf, buf, count, cd); - mutex_unlock(&queue_io_mutex); -out: - return ret; -} - static ssize_t cache_downcall(struct address_space *mapping, const char __user *buf, size_t count, struct cache_detail *cd) { - struct page *page; - char *kaddr; + char *write_buf; ssize_t ret = -ENOMEM; - if (count >= PAGE_SIZE) - goto out_slow; + if (count >= 32768) { /* 32k is max userland buffer, lets check anyway */ + ret = -EINVAL; + goto out; + } - page = find_or_create_page(mapping, 0, GFP_KERNEL); - if (!page) - goto out_slow; + write_buf = kvmalloc(count + 1, GFP_KERNEL); + if (!write_buf) + goto out; - kaddr = kmap(page); - ret = cache_do_downcall(kaddr, buf, count, cd); - kunmap(page); - unlock_page(page); - put_page(page); + ret = cache_do_downcall(write_buf, buf, count, cd); + kvfree(write_buf); +out: return ret; -out_slow: - return cache_slow_downcall(buf, count, cd); } static ssize_t cache_write(struct file *filp, const char __user *buf, From 5e54dafbe0b4a2a8555ecf0b9690150da37ee0e1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 24 Nov 2020 15:48:01 -0500 Subject: [PATCH 116/131] SUNRPC: Remove XDRBUF_SPARSE_PAGES flag in gss_proxy upcall There's no need to defer allocation of pages for the receive buffer. - This upcall is quite infrequent - gssp_alloc_receive_pages() can allocate the pages with GFP_KERNEL, unlike the transport - gssp_alloc_receive_pages() knows exactly how many pages are needed Signed-off-by: Chuck Lever Reviewed-by: Olga Kornievskaia --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 15 ++++++++++----- net/sunrpc/auth_gss/gss_rpc_xdr.c | 1 - 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index af9c7f43859c..d1c003a25b0f 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -200,7 +200,7 @@ static int gssp_call(struct net *net, struct rpc_message *msg) static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg) { - int i; + unsigned int i; for (i = 0; i < arg->npages && arg->pages[i]; i++) __free_page(arg->pages[i]); @@ -210,14 +210,19 @@ static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg) static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) { + unsigned int i; + arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE); arg->pages = kcalloc(arg->npages, sizeof(struct page *), GFP_KERNEL); - /* - * XXX: actual pages are allocated by xdr layer in - * xdr_partial_copy_from_skb. - */ if (!arg->pages) return -ENOMEM; + for (i = 0; i < arg->npages; i++) { + arg->pages[i] = alloc_page(GFP_KERNEL); + if (!arg->pages[i]) { + gssp_free_receive_pages(arg); + return -ENOMEM; + } + } return 0; } diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index c636c648849b..d79f12c2550a 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -771,7 +771,6 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req, xdr_inline_pages(&req->rq_rcv_buf, PAGE_SIZE/2 /* pretty arbitrary */, arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE); - req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; done: if (err) dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err); From eb162e1772f85231dabc789fb4bfea63d2d9df79 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Nov 2020 16:17:40 -0500 Subject: [PATCH 117/131] NFSD: Fix sparse warning in nfs4proc.c linux/fs/nfsd/nfs4proc.c:1542:24: warning: incorrect type in assignment (different base types) linux/fs/nfsd/nfs4proc.c:1542:24: expected restricted __be32 [assigned] [usertype] status linux/fs/nfsd/nfs4proc.c:1542:24: got int Clean-up: The dup_copy_fields() function returns only zero, so make it return void for now, and get rid of the return code check. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 73e717609213..4727b7f03c5b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1425,7 +1425,7 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) return status; } -static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) +static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) { dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; @@ -1444,8 +1444,6 @@ static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); dst->ss_mnt = src->ss_mnt; - - return 0; } static void cleanup_async_copy(struct nfsd4_copy *copy) @@ -1539,9 +1537,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, refcount_set(&async_copy->refcount, 1); memcpy(©->cp_res.cb_stateid, ©->cp_stateid, sizeof(copy->cp_stateid)); - status = dup_copy_fields(copy, async_copy); - if (status) - goto out_err; + dup_copy_fields(copy, async_copy); async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) From ca9364dde50daba93eff711b4b945fd08beafcc2 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Mon, 30 Nov 2020 16:24:49 -0500 Subject: [PATCH 118/131] NFSD: Fix 5 seconds delay when doing inter server copy Since commit b4868b44c5628 ("NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE"), every inter server copy operation suffers 5 seconds delay regardless of the size of the copy. The delay is from nfs_set_open_stateid_locked when the check by nfs_stateid_is_sequential fails because the seqid in both nfs4_state and nfs4_stateid are 0. Fix by modifying nfs4_init_cp_state to return the stateid with seqid 1 instead of 0. This is also to conform with section 4.8 of RFC 7862. Here is the relevant paragraph from section 4.8 of RFC 7862: A copy offload stateid's seqid MUST NOT be zero. In the context of a copy offload operation, it is inappropriate to indicate "the most recent copy offload operation" using a stateid with a seqid of zero (see Section 8.2.2 of [RFC5661]). It is inappropriate because the stateid refers to internal state in the server and there may be several asynchronous COPY operations being performed in parallel on the same file by the server. Therefore, a copy offload stateid with a seqid of zero MUST be considered invalid. Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index be6dcc4c2ab0..1d2cd6a88f61 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -769,6 +769,7 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, spin_lock(&nn->s2s_cp_lock); new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); stid->stid.si_opaque.so_id = new_id; + stid->stid.si_generation = 1; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) From 4a9d81caf841cd2c0ae36abec9c2963bf21d0284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cheng=C2=A0Lin?= Date: Tue, 1 Dec 2020 07:06:35 -0500 Subject: [PATCH 119/131] nfs_common: need lock during iterate through the list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the elem is deleted during be iterated on it, the iteration process will fall into an endless loop. kernel: NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [nfsd:17137] PID: 17137  TASK: ffff8818d93c0000  CPU: 4   COMMAND: "nfsd"     [exception RIP: __state_in_grace+76]     RIP: ffffffffc00e817c  RSP: ffff8818d3aefc98  RFLAGS: 00000246     RAX: ffff881dc0c38298  RBX: ffffffff81b03580  RCX: ffff881dc02c9f50     RDX: ffff881e3fce8500  RSI: 0000000000000001  RDI: ffffffff81b03580     RBP: ffff8818d3aefca0   R8: 0000000000000020   R9: ffff8818d3aefd40     R10: ffff88017fc03800  R11: ffff8818e83933c0  R12: ffff8818d3aefd40     R13: 0000000000000000  R14: ffff8818e8391068  R15: ffff8818fa6e4000     CS: 0010  SS: 0018  #0 [ffff8818d3aefc98] opens_in_grace at ffffffffc00e81e3 [grace]  #1 [ffff8818d3aefca8] nfs4_preprocess_stateid_op at ffffffffc02a3e6c [nfsd]  #2 [ffff8818d3aefd18] nfsd4_write at ffffffffc028ed5b [nfsd]  #3 [ffff8818d3aefd80] nfsd4_proc_compound at ffffffffc0290a0d [nfsd]  #4 [ffff8818d3aefdd0] nfsd_dispatch at ffffffffc027b800 [nfsd]  #5 [ffff8818d3aefe08] svc_process_common at ffffffffc02017f3 [sunrpc]  #6 [ffff8818d3aefe70] svc_process at ffffffffc0201ce3 [sunrpc]  #7 [ffff8818d3aefe98] nfsd at ffffffffc027b117 [nfsd]  #8 [ffff8818d3aefec8] kthread at ffffffff810b88c1  #9 [ffff8818d3aeff50] ret_from_fork at ffffffff816d1607 The troublemake elem: crash> lock_manager ffff881dc0c38298 struct lock_manager {   list = {     next = 0xffff881dc0c38298,     prev = 0xffff881dc0c38298   },   block_opens = false } Fixes: c87fb4a378f9 ("lockd: NLM grace period shouldn't block NFSv4 opens") Signed-off-by: Cheng Lin  Signed-off-by: Yi Wang  Signed-off-by: Chuck Lever --- fs/nfs_common/grace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index b73d9dd37f73..26f2a50eceac 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -69,10 +69,14 @@ __state_in_grace(struct net *net, bool open) if (!open) return !list_empty(grace_list); + spin_lock(&grace_lock); list_for_each_entry(lm, grace_list, list) { - if (lm->block_opens) + if (lm->block_opens) { + spin_unlock(&grace_lock); return true; + } } + spin_unlock(&grace_lock); return false; } From 70b87f77294d16d3e567056ba4c9ee2b091a5b50 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 30 Nov 2020 17:46:14 -0500 Subject: [PATCH 120/131] nfsd: only call inode_query_iversion in the I_VERSION case inode_query_iversion() can modify i_version. Depending on the exported filesystem, that may not be safe. For example, if you're re-exporting NFS, NFS stores the server's change attribute in i_version and does not expect it to be modified locally. This has been observed causing unnecessary cache invalidations. The way a filesystem indicates that it's OK to call inode_query_iverson() is by setting SB_I_VERSION. So, move the I_VERSION check out of encode_change(), where it's used only in GETATTR responses, to nfsd4_change_attribute(), which is also called for pre- and post- operation attributes. (Note we could also pull the NFSEXP_V4ROOT case into nfsd4_change_attribute() as well. That would actually be a no-op, since pre/post attrs are only used for metadata-modifying operations, and V4ROOT exports are read-only. But we might make the change in the future just for simplicity.) Reported-by: Daire Byrne Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 5 ++--- fs/nfsd/nfs4xdr.c | 6 +----- fs/nfsd/nfsfh.h | 14 ++++++++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index a6718b952975..4599de24ef1a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -291,14 +291,13 @@ void fill_post_wcc(struct svc_fh *fhp) printk("nfsd: inode locked twice during operation.\n"); err = fh_getattr(fhp, &fhp->fh_post_attr); - fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, - d_inode(fhp->fh_dentry)); if (err) { fhp->fh_post_saved = false; - /* Grab the ctime anyway - set_change_info might use it */ fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; } else fhp->fh_post_saved = true; + fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, + d_inode(fhp->fh_dentry)); } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7142b0501451..ef1f55366616 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2426,12 +2426,8 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, if (exp->ex_flags & NFSEXP_V4ROOT) { *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); *p++ = 0; - } else if (IS_I_VERSION(inode)) { + } else p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); - } else { - *p++ = cpu_to_be32(stat->ctime.tv_sec); - *p++ = cpu_to_be32(stat->ctime.tv_nsec); - } return p; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 56cfbc361561..39d764b129fa 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -261,10 +261,16 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, { u64 chattr; - chattr = stat->ctime.tv_sec; - chattr <<= 30; - chattr += stat->ctime.tv_nsec; - chattr += inode_query_iversion(inode); + if (IS_I_VERSION(inode)) { + chattr = stat->ctime.tv_sec; + chattr <<= 30; + chattr += stat->ctime.tv_nsec; + chattr += inode_query_iversion(inode); + } else { + chattr = stat->ctime.tv_sec; + chattr <<= 32; + chattr += stat->ctime.tv_nsec; + } return chattr; } From b2140338d8dca827ad9e83f3e026e9d51748b265 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 30 Nov 2020 17:46:15 -0500 Subject: [PATCH 121/131] nfsd: simplify nfsd4_change_info It doesn't make sense to carry all these extra fields around. Just make everything into change attribute from the start. This is just cleanup, there should be no change in behavior. Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 ++--------- fs/nfsd/xdr4.h | 11 ----------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ef1f55366616..7f650fa47006 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2459,15 +2459,8 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode) static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) { *p++ = cpu_to_be32(c->atomic); - if (c->change_supported) { - p = xdr_encode_hyper(p, c->before_change); - p = xdr_encode_hyper(p, c->after_change); - } else { - *p++ = cpu_to_be32(c->before_ctime_sec); - *p++ = cpu_to_be32(c->before_ctime_nsec); - *p++ = cpu_to_be32(c->after_ctime_sec); - *p++ = cpu_to_be32(c->after_ctime_nsec); - } + p = xdr_encode_hyper(p, c->before_change); + p = xdr_encode_hyper(p, c->after_change); return p; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index e12fbe382e3f..b4556e86e97c 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -76,12 +76,7 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs) struct nfsd4_change_info { u32 atomic; - bool change_supported; - u32 before_ctime_sec; - u32 before_ctime_nsec; u64 before_change; - u32 after_ctime_sec; - u32 after_ctime_nsec; u64 after_change; }; @@ -754,15 +749,9 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) { BUG_ON(!fhp->fh_pre_saved); cinfo->atomic = (u32)fhp->fh_post_saved; - cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry)); cinfo->before_change = fhp->fh_pre_change; cinfo->after_change = fhp->fh_post_change; - cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; - cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; - cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; - cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; - } From 4b03d99794eeed27650597a886247c6427ce1055 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 30 Nov 2020 17:46:16 -0500 Subject: [PATCH 122/131] nfsd: minor nfsd4_change_attribute cleanup Minor cleanup, no change in behavior. Also pull out a common helper that'll be useful elsewhere. Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfsfh.h | 13 +++++-------- include/linux/iversion.h | 13 +++++++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 39d764b129fa..45bd776290d5 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -259,19 +259,16 @@ fh_clear_wcc(struct svc_fh *fhp) static inline u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode) { - u64 chattr; - if (IS_I_VERSION(inode)) { + u64 chattr; + chattr = stat->ctime.tv_sec; chattr <<= 30; chattr += stat->ctime.tv_nsec; chattr += inode_query_iversion(inode); - } else { - chattr = stat->ctime.tv_sec; - chattr <<= 32; - chattr += stat->ctime.tv_nsec; - } - return chattr; + return chattr; + } else + return time_to_chattr(&stat->ctime); } extern void fill_pre_wcc(struct svc_fh *fhp); diff --git a/include/linux/iversion.h b/include/linux/iversion.h index 2917ef990d43..3bfebde5a1a6 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -328,6 +328,19 @@ inode_query_iversion(struct inode *inode) return cur >> I_VERSION_QUERIED_SHIFT; } +/* + * For filesystems without any sort of change attribute, the best we can + * do is fake one up from the ctime: + */ +static inline u64 time_to_chattr(struct timespec64 *t) +{ + u64 chattr = t->tv_sec; + + chattr <<= 32; + chattr += t->tv_nsec; + return chattr; +} + /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check From 942b20dc245590327ee0187c15c78174cd96dd52 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 30 Nov 2020 17:46:17 -0500 Subject: [PATCH 123/131] nfsd4: don't query change attribute in v2/v3 case inode_query_iversion() has side effects, and there's no point calling it when we're not even going to use it. We check whether we're currently processing a v4 request by checking fh_maxsize, which is arguably a little hacky; we could add a flag to svc_fh instead. Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 4599de24ef1a..b0a53c857706 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -259,11 +259,11 @@ void fill_pre_wcc(struct svc_fh *fhp) { struct inode *inode; struct kstat stat; + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); __be32 err; if (fhp->fh_pre_saved) return; - inode = d_inode(fhp->fh_dentry); err = fh_getattr(fhp, &stat); if (err) { @@ -272,11 +272,12 @@ void fill_pre_wcc(struct svc_fh *fhp) stat.ctime = inode->i_ctime; stat.size = inode->i_size; } + if (v4) + fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); fhp->fh_pre_mtime = stat.mtime; fhp->fh_pre_ctime = stat.ctime; fhp->fh_pre_size = stat.size; - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); fhp->fh_pre_saved = true; } @@ -285,6 +286,8 @@ void fill_pre_wcc(struct svc_fh *fhp) */ void fill_post_wcc(struct svc_fh *fhp) { + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); + struct inode *inode = d_inode(fhp->fh_dentry); __be32 err; if (fhp->fh_post_saved) @@ -293,11 +296,12 @@ void fill_post_wcc(struct svc_fh *fhp) err = fh_getattr(fhp, &fhp->fh_post_attr); if (err) { fhp->fh_post_saved = false; - fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; + fhp->fh_post_attr.ctime = inode->i_ctime; } else fhp->fh_post_saved = true; - fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, - d_inode(fhp->fh_dentry)); + if (v4) + fhp->fh_post_change = + nfsd4_change_attribute(&fhp->fh_post_attr, inode); } /* From 1631087ba8727db03c0ab2815dc06dc25d962b80 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 30 Nov 2020 17:46:18 -0500 Subject: [PATCH 124/131] Revert "nfsd4: support change_attr_type attribute" This reverts commit a85857633b04d57f4524cca0a2bfaf87b2543f9f. We're still factoring ctime into our change attribute even in the IS_I_VERSION case. If someone sets the system time backwards, a client could see the change attribute go backwards. Maybe we can just say "well, don't do that", but there's some question whether that's good enough, or whether we need a better guarantee. Also, the client still isn't actually using the attribute. While we're still figuring this out, let's just stop returning this attribute. Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 10 ---------- fs/nfsd/nfsd.h | 1 - include/linux/nfs4.h | 8 -------- 3 files changed, 19 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7f650fa47006..45ee6b12ce5b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3311,16 +3311,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; } - if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - if (IS_I_VERSION(d_inode(dentry))) - *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR); - else - *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA); - } - #ifdef CONFIG_NFSD_V4_SECURITY_LABEL if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7907de3f2ee6..d63cf8196fed 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -395,7 +395,6 @@ void nfsd_lockd_shutdown(void); #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ - FATTR4_WORD2_CHANGE_ATTR_TYPE | \ FATTR4_WORD2_MODE_UMASK | \ NFSD4_2_SECURITY_ATTRS | \ FATTR4_WORD2_XATTR_SUPPORT) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 9dc7eeac924f..5b4c67c91f56 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -385,13 +385,6 @@ enum lock_type4 { NFS4_WRITEW_LT = 4 }; -enum change_attr_type4 { - NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0, - NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1, - NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2, - NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3, - NFS4_CHANGE_TYPE_IS_UNDEFINED = 4 -}; /* Mandatory Attributes */ #define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0) @@ -459,7 +452,6 @@ enum change_attr_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) -#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) #define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) From daab110e47f8d7aa6da66923e3ac1a8dbd2b2a72 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:14 -0500 Subject: [PATCH 125/131] nfsd: add a new EXPORT_OP_NOWCC flag to struct export_operations With NFSv3 nfsd will always attempt to send along WCC data to the client. This generally involves saving off the in-core inode information prior to doing the operation on the given filehandle, and then issuing a vfs_getattr to it after the op. Some filesystems (particularly clustered or networked ones) have an expensive ->getattr inode operation. Atomicity is also often difficult or impossible to guarantee on such filesystems. For those, we're best off not trying to provide WCC information to the client at all, and to simply allow it to poll for that information as needed with a GETATTR RPC. This patch adds a new flags field to struct export_operations, and defines a new EXPORT_OP_NOWCC flag that filesystems can use to indicate that nfsd should not attempt to provide WCC info in NFSv3 replies. It also adds a blurb about the new flags field and flag to the exporting documentation. The server will also now skip collecting this information for NFSv2 as well, since that info is never used there anyway. Note that this patch does not add this flag to any filesystem export_operations structures. This was originally developed to allow reexporting nfs via nfsd. Other filesystems may want to consider enabling this flag too. It's hard to tell however which ones have export operations to enable export via knfsd and which ones mostly rely on them for open-by-filehandle support, so I'm leaving that up to the individual maintainers to decide. I am cc'ing the relevant lists for those filesystems that I think may want to consider adding this though. Cc: HPDD-discuss@lists.01.org Cc: ceph-devel@vger.kernel.org Cc: cluster-devel@redhat.com Cc: fuse-devel@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- Documentation/filesystems/nfs/exporting.rst | 27 +++++++++++++++++++++ fs/nfs/export.c | 1 + fs/nfsd/nfs3xdr.c | 7 ++++-- fs/nfsd/nfsfh.c | 14 +++++++++++ fs/nfsd/nfsfh.h | 2 +- include/linux/exportfs.h | 2 ++ 6 files changed, 50 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst index 33d588a01ace..cbe542ad5233 100644 --- a/Documentation/filesystems/nfs/exporting.rst +++ b/Documentation/filesystems/nfs/exporting.rst @@ -154,6 +154,11 @@ struct which has the following members: to find potential names, and matches inode numbers to find the correct match. + flags + Some filesystems may need to be handled differently than others. The + export_operations struct also includes a flags field that allows the + filesystem to communicate such information to nfsd. See the Export + Operations Flags section below for more explanation. A filehandle fragment consists of an array of 1 or more 4byte words, together with a one byte "type". @@ -163,3 +168,25 @@ generated by encode_fh, in which case it will have been padded with nuls. Rather, the encode_fh routine should choose a "type" which indicates the decode_fh how much of the filehandle is valid, and how it should be interpreted. + +Export Operations Flags +----------------------- +In addition to the operation vector pointers, struct export_operations also +contains a "flags" field that allows the filesystem to communicate to nfsd +that it may want to do things differently when dealing with it. The +following flags are defined: + + EXPORT_OP_NOWCC - disable NFSv3 WCC attributes on this filesystem + RFC 1813 recommends that servers always send weak cache consistency + (WCC) data to the client after each operation. The server should + atomically collect attributes about the inode, do an operation on it, + and then collect the attributes afterward. This allows the client to + skip issuing GETATTRs in some situations but means that the server + is calling vfs_getattr for almost all RPCs. On some filesystems + (particularly those that are clustered or networked) this is expensive + and atomicity is difficult to guarantee. This flag indicates to nfsd + that it should skip providing WCC attributes to the client in NFSv3 + replies when doing operations on this filesystem. Consider enabling + this on filesystems that have an expensive ->getattr inode operation, + or when atomicity between pre and post operation attribute collection + is impossible to guarantee. diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 3430d6891e89..8f4c528865c5 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -171,4 +171,5 @@ const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, + .flags = EXPORT_OP_NOWCC, }; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index b0a53c857706..821db21ba072 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -206,7 +206,7 @@ static __be32 * encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; - if (dentry && d_really_is_positive(dentry)) { + if (!fhp->fh_no_wcc && dentry && d_really_is_positive(dentry)) { __be32 err; struct kstat stat; @@ -262,7 +262,7 @@ void fill_pre_wcc(struct svc_fh *fhp) bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); __be32 err; - if (fhp->fh_pre_saved) + if (fhp->fh_no_wcc || fhp->fh_pre_saved) return; inode = d_inode(fhp->fh_dentry); err = fh_getattr(fhp, &stat); @@ -290,6 +290,9 @@ void fill_post_wcc(struct svc_fh *fhp) struct inode *inode = d_inode(fhp->fh_dentry); __be32 err; + if (fhp->fh_no_wcc) + return; + if (fhp->fh_post_saved) printk("nfsd: inode locked twice during operation.\n"); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c81dbbad8792..9c29a523f484 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -291,6 +291,16 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) fhp->fh_dentry = dentry; fhp->fh_export = exp; + + switch (rqstp->rq_vers) { + case 3: + if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC) + fhp->fh_no_wcc = true; + break; + case 2: + fhp->fh_no_wcc = true; + } + return 0; out: exp_put(exp); @@ -559,6 +569,9 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ set_version_and_fsid_type(fhp, exp, ref_fh); + /* If we have a ref_fh, then copy the fh_no_wcc setting from it. */ + fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false; + if (ref_fh == fhp) fh_put(ref_fh); @@ -662,6 +675,7 @@ fh_put(struct svc_fh *fhp) exp_put(exp); fhp->fh_export = NULL; } + fhp->fh_no_wcc = false; return; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 45bd776290d5..347d10aa6265 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -35,6 +35,7 @@ typedef struct svc_fh { bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ + bool fh_no_wcc; /* no wcc data needed */ int fh_flags; /* FH flags */ #ifdef CONFIG_NFSD_V3 bool fh_post_saved; /* post-op attrs saved */ @@ -54,7 +55,6 @@ typedef struct svc_fh { struct kstat fh_post_attr; /* full attrs after operation */ u64 fh_post_change; /* nfsv4 change; see above */ #endif /* CONFIG_NFSD_V3 */ - } svc_fh; #define NFSD4_FH_FOREIGN (1<<0) #define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f)) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3ceb72b67a7a..e7de0103a32e 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,6 +213,8 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); +#define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ + unsigned long flags; }; extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, From ba5e8187c55555519ae0b63c0fb681391bc42af9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:15 -0500 Subject: [PATCH 126/131] nfsd: allow filesystems to opt out of subtree checking When we start allowing NFS to be reexported, then we have some problems when it comes to subtree checking. In principle, we could allow it, but it would mean encoding parent info in the filehandles and there may not be enough space for that in a NFSv3 filehandle. To enforce this at export upcall time, we add a new export_ops flag that declares the filesystem ineligible for subtree checking. Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- Documentation/filesystems/nfs/exporting.rst | 12 ++++++++++++ fs/nfs/export.c | 2 +- fs/nfsd/export.c | 6 ++++++ include/linux/exportfs.h | 1 + 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst index cbe542ad5233..960be64446cb 100644 --- a/Documentation/filesystems/nfs/exporting.rst +++ b/Documentation/filesystems/nfs/exporting.rst @@ -190,3 +190,15 @@ following flags are defined: this on filesystems that have an expensive ->getattr inode operation, or when atomicity between pre and post operation attribute collection is impossible to guarantee. + + EXPORT_OP_NOSUBTREECHK - disallow subtree checking on this fs + Many NFS operations deal with filehandles, which the server must then + vet to ensure that they live inside of an exported tree. When the + export consists of an entire filesystem, this is trivial. nfsd can just + ensure that the filehandle live on the filesystem. When only part of a + filesystem is exported however, then nfsd must walk the ancestors of the + inode to ensure that it's within an exported subtree. This is an + expensive operation and not all filesystems can support it properly. + This flag exempts the filesystem from subtree checking and causes + exportfs to get back an error if it tries to enable subtree checking + on it. diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 8f4c528865c5..b9ba306bf912 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -171,5 +171,5 @@ const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, - .flags = EXPORT_OP_NOWCC, + .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK, }; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 21e404e7cb68..81e7bb12aca6 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -408,6 +408,12 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid) return -EINVAL; } + if (inode->i_sb->s_export_op->flags & EXPORT_OP_NOSUBTREECHK && + !(*flags & NFSEXP_NOSUBTREECHECK)) { + dprintk("%s: %s does not support subtree checking!\n", + __func__, inode->i_sb->s_type->name); + return -EINVAL; + } return 0; } diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index e7de0103a32e..2fcbab0f6b61 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -214,6 +214,7 @@ struct export_operations { int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); #define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ +#define EXPORT_OP_NOSUBTREECHK (0x2) /* Subtree checking is not supported! */ unsigned long flags; }; From 7f84b488f9add1d5cca3e6197c95914c7bd3c1cf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:16 -0500 Subject: [PATCH 127/131] nfsd: close cached files prior to a REMOVE or RENAME that would replace target It's not uncommon for some workloads to do a bunch of I/O to a file and delete it just afterward. If knfsd has a cached open file however, then the file may still be open when the dentry is unlinked. If the underlying filesystem is nfs, then that could trigger it to do a sillyrename. On a REMOVE or RENAME scan the nfsd_file cache for open files that correspond to the inode, and proactively unhash and put their references. This should prevent any delete-on-last-close activity from occurring, solely due to knfsd's open file cache. This must be done synchronously though so we use the variants that call flush_delayed_fput. There are deadlock possibilities if you call flush_delayed_fput while holding locks, however. In the case of nfsd_rename, we don't even do the lookups of the dentries to be renamed until we've locked for rename. Once we've figured out what the target dentry is for a rename, check to see whether there are cached open files associated with it. If there are, then unwind all of the locking, close them all, and then reattempt the rename. None of this is really necessary for "typical" filesystems though. It's mostly of use for NFS, so declare a new export op flag and use that to determine whether to close the files beforehand. Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- Documentation/filesystems/nfs/exporting.rst | 13 +++++++++++++ fs/nfs/export.c | 2 +- fs/nfsd/vfs.c | 16 +++++++++------- include/linux/exportfs.h | 5 +++-- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst index 960be64446cb..0e98edd353b5 100644 --- a/Documentation/filesystems/nfs/exporting.rst +++ b/Documentation/filesystems/nfs/exporting.rst @@ -202,3 +202,16 @@ following flags are defined: This flag exempts the filesystem from subtree checking and causes exportfs to get back an error if it tries to enable subtree checking on it. + + EXPORT_OP_CLOSE_BEFORE_UNLINK - always close cached files before unlinking + On some exportable filesystems (such as NFS) unlinking a file that + is still open can cause a fair bit of extra work. For instance, + the NFS client will do a "sillyrename" to ensure that the file + sticks around while it's still open. When reexporting, that open + file is held by nfsd so we usually end up doing a sillyrename, and + then immediately deleting the sillyrenamed file just afterward when + the link count actually goes to zero. Sometimes this delete can race + with other operations (for instance an rmdir of the parent directory). + This flag causes nfsd to close any open files for this inode _before_ + calling into the vfs to do an unlink or a rename that would replace + an existing file. diff --git a/fs/nfs/export.c b/fs/nfs/export.c index b9ba306bf912..5428713af5fe 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -171,5 +171,5 @@ const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, - .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK, + .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|EXPORT_OP_CLOSE_BEFORE_UNLINK, }; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 1ecaceebee13..79cba942087e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1724,7 +1724,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, struct inode *fdir, *tdir; __be32 err; int host_err; - bool has_cached = false; + bool close_cached = false; err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) @@ -1783,8 +1783,9 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) goto out_dput_new; - if (nfsd_has_cached_files(ndentry)) { - has_cached = true; + if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) && + nfsd_has_cached_files(ndentry)) { + close_cached = true; goto out_dput_old; } else { host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); @@ -1805,7 +1806,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, * as that would do the wrong thing if the two directories * were the same, so again we do it by hand. */ - if (!has_cached) { + if (!close_cached) { fill_post_wcc(ffhp); fill_post_wcc(tfhp); } @@ -1819,8 +1820,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, * shouldn't be done with locks held however, so we delay it until this * point and then reattempt the whole shebang. */ - if (has_cached) { - has_cached = false; + if (close_cached) { + close_cached = false; nfsd_close_cached_files(ndentry); dput(ndentry); goto retry; @@ -1872,7 +1873,8 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, type = d_inode(rdentry)->i_mode & S_IFMT; if (type != S_IFDIR) { - nfsd_close_cached_files(rdentry); + if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) + nfsd_close_cached_files(rdentry); host_err = vfs_unlink(dirp, rdentry, NULL); } else { host_err = vfs_rmdir(dirp, rdentry); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 2fcbab0f6b61..d829403ffd3b 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,8 +213,9 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); -#define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ -#define EXPORT_OP_NOSUBTREECHK (0x2) /* Subtree checking is not supported! */ +#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ +#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ +#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ unsigned long flags; }; From d045465fc6cbfa4acfb5a7d817a7c1a57a078109 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 17:03:17 -0500 Subject: [PATCH 128/131] exportfs: Add a function to return the raw output from fh_to_dentry() In order to allow nfsd to accept return values that are not acceptable to overlayfs and others, add a new function. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/exportfs/expfs.c | 32 ++++++++++++++++++++++++-------- include/linux/exportfs.h | 5 +++++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 2dd55b172d57..0106eba46d5a 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -417,9 +417,11 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, } EXPORT_SYMBOL_GPL(exportfs_encode_fh); -struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, - int fh_len, int fileid_type, - int (*acceptable)(void *, struct dentry *), void *context) +struct dentry * +exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, + int fileid_type, + int (*acceptable)(void *, struct dentry *), + void *context) { const struct export_operations *nop = mnt->mnt_sb->s_export_op; struct dentry *result, *alias; @@ -432,10 +434,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, if (!nop || !nop->fh_to_dentry) return ERR_PTR(-ESTALE); result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); - if (PTR_ERR(result) == -ENOMEM) - return ERR_CAST(result); if (IS_ERR_OR_NULL(result)) - return ERR_PTR(-ESTALE); + return result; /* * If no acceptance criteria was specified by caller, a disconnected @@ -561,10 +561,26 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, err_result: dput(result); - if (err != -ENOMEM) - err = -ESTALE; return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw); + +struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, + int fh_len, int fileid_type, + int (*acceptable)(void *, struct dentry *), + void *context) +{ + struct dentry *ret; + + ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, + acceptable, context); + if (IS_ERR_OR_NULL(ret)) { + if (ret == ERR_PTR(-ENOMEM)) + return ret; + return ERR_PTR(-ESTALE); + } + return ret; +} EXPORT_SYMBOL_GPL(exportfs_decode_fh); MODULE_LICENSE("GPL"); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d829403ffd3b..846df3c96730 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -223,6 +223,11 @@ extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent); extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int connectable); +extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt, + struct fid *fid, int fh_len, + int fileid_type, + int (*acceptable)(void *, struct dentry *), + void *context); extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), void *context); From 2e19d10c1438241de32467637a2a411971547991 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 17:03:18 -0500 Subject: [PATCH 129/131] nfsd: Fix up nfsd to ensure that timeout errors don't result in ESTALE If the underlying filesystem times out, then we want knfsd to return NFSERR_JUKEBOX/DELAY rather than NFSERR_STALE. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/nfsfh.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 9c29a523f484..e80a7525561d 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -268,12 +268,20 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (fileid_type == FILEID_ROOT) dentry = dget(exp->ex_path.dentry); else { - dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, - data_left, fileid_type, - nfsd_acceptable, exp); - if (IS_ERR_OR_NULL(dentry)) + dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid, + data_left, fileid_type, + nfsd_acceptable, exp); + if (IS_ERR_OR_NULL(dentry)) { trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp, dentry ? PTR_ERR(dentry) : -ESTALE); + switch (PTR_ERR(dentry)) { + case -ENOMEM: + case -ETIMEDOUT: + break; + default: + dentry = ERR_PTR(-ESTALE); + } + } } if (dentry == NULL) goto out; From 01cbf3853959feec40ec9b9a399e12a021cd4d81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 17:03:19 -0500 Subject: [PATCH 130/131] nfsd: Set PF_LOCAL_THROTTLE on local filesystems only Don't set PF_LOCAL_THROTTLE on remote filesystems like NFS, since they aren't expected to ever be subject to double buffering. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfs/export.c | 3 ++- fs/nfsd/vfs.c | 13 +++++++++++-- include/linux/exportfs.h | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 5428713af5fe..48b879cfe6e3 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -171,5 +171,6 @@ const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, - .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|EXPORT_OP_CLOSE_BEFORE_UNLINK, + .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| + EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS, }; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 79cba942087e..04937e51de56 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -978,18 +978,25 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, __be32 *verf) { struct file *file = nf->nf_file; + struct super_block *sb = file_inode(file)->i_sb; struct svc_export *exp; struct iov_iter iter; __be32 nfserr; int host_err; int use_wgather; loff_t pos = offset; + unsigned long exp_op_flags = 0; unsigned int pflags = current->flags; rwf_t flags = 0; + bool restore_flags = false; trace_nfsd_write_opened(rqstp, fhp, offset, *cnt); - if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) + if (sb->s_export_op) + exp_op_flags = sb->s_export_op->flags; + + if (test_bit(RQ_LOCAL, &rqstp->rq_flags) && + !(exp_op_flags & EXPORT_OP_REMOTE_FS)) { /* * We want throttling in balance_dirty_pages() * and shrink_inactive_list() to only consider @@ -998,6 +1005,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, * the client's dirty pages or its congested queue. */ current->flags |= PF_LOCAL_THROTTLE; + restore_flags = true; + } exp = fhp->fh_export; use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); @@ -1049,7 +1058,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, trace_nfsd_write_err(rqstp, fhp, offset, host_err); nfserr = nfserrno(host_err); } - if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) + if (restore_flags) current_restore_flags(pflags, PF_LOCAL_THROTTLE); return nfserr; } diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 846df3c96730..d93e8a6737bb 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -216,6 +216,7 @@ struct export_operations { #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ +#define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */ unsigned long flags; }; From 716a8bc7f706eeef80ab42c99d9f210eda845c81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 23:14:27 -0500 Subject: [PATCH 131/131] nfsd: Record NFSv4 pre/post-op attributes as non-atomic For the case of NFSv4, specify to the client that the pre/post-op attributes were not recorded atomically with the main operation. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfs/export.c | 3 ++- fs/nfsd/nfsfh.c | 4 ++++ fs/nfsd/nfsfh.h | 5 +++++ fs/nfsd/xdr4.h | 2 +- include/linux/exportfs.h | 3 +++ 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 48b879cfe6e3..7412bb164fa7 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -172,5 +172,6 @@ const struct export_operations nfs_export_ops = { .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| - EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS, + EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| + EXPORT_OP_NOATOMIC_ATTR, }; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index e80a7525561d..66f2ef67792a 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -301,6 +301,10 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) fhp->fh_export = exp; switch (rqstp->rq_vers) { + case 4: + if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR) + fhp->fh_no_atomic_attr = true; + break; case 3: if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC) fhp->fh_no_wcc = true; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 347d10aa6265..cb20c2cd3469 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -36,6 +36,11 @@ typedef struct svc_fh { bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ bool fh_no_wcc; /* no wcc data needed */ + bool fh_no_atomic_attr; + /* + * wcc data is not atomic with + * operation + */ int fh_flags; /* FH flags */ #ifdef CONFIG_NFSD_V3 bool fh_post_saved; /* post-op attrs saved */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index b4556e86e97c..a60ff5ce1a37 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -748,7 +748,7 @@ static inline void set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) { BUG_ON(!fhp->fh_pre_saved); - cinfo->atomic = (u32)fhp->fh_post_saved; + cinfo->atomic = (u32)(fhp->fh_post_saved && !fhp->fh_no_atomic_attr); cinfo->before_change = fhp->fh_pre_change; cinfo->after_change = fhp->fh_post_change; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d93e8a6737bb..9f4d4bcbf251 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -217,6 +217,9 @@ struct export_operations { #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ #define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */ +#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply + atomic attribute updates + */ unsigned long flags; };