From 7f33b92e5b18e904a481e6e208486da43e4dc841 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Sep 2024 12:15:23 -0400 Subject: [PATCH 01/72] NFSD: Prevent a potential integer overflow If the tag length is >= U32_MAX - 3 then the "length + 4" addition can result in an integer overflow. Address this by splitting the decoding into several steps so that decode_cb_compound4res() does not have to perform arithmetic on the unsafe length value. Reported-by: Dan Carpenter Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index b5b3ab9d719a..478b548f4147 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -287,17 +287,17 @@ static int decode_cb_compound4res(struct xdr_stream *xdr, u32 length; __be32 *p; - p = xdr_inline_decode(xdr, 4 + 4); + p = xdr_inline_decode(xdr, XDR_UNIT); if (unlikely(p == NULL)) goto out_overflow; - hdr->status = be32_to_cpup(p++); + hdr->status = be32_to_cpup(p); /* Ignore the tag */ - length = be32_to_cpup(p++); - p = xdr_inline_decode(xdr, length + 4); - if (unlikely(p == NULL)) + if (xdr_stream_decode_u32(xdr, &length) < 0) + goto out_overflow; + if (xdr_inline_decode(xdr, length) == NULL) + goto out_overflow; + if (xdr_stream_decode_u32(xdr, &hdr->nops) < 0) goto out_overflow; - p += XDR_QUADLEN(length); - hdr->nops = be32_to_cpup(p); return 0; out_overflow: return -EIO; From 3c63d8946e578663b868cb9912dac616ea68bfd0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Sep 2024 12:15:29 -0400 Subject: [PATCH 02/72] svcrdma: Address an integer overflow Dan Carpenter reports: > Commit 78147ca8b4a9 ("svcrdma: Add a "parsed chunk list" data > structure") from Jun 22, 2020 (linux-next), leads to the following > Smatch static checker warning: > > net/sunrpc/xprtrdma/svc_rdma_recvfrom.c:498 xdr_check_write_chunk() > warn: potential user controlled sizeof overflow 'segcount * 4 * 4' > > net/sunrpc/xprtrdma/svc_rdma_recvfrom.c > 488 static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt) > 489 { > 490 u32 segcount; > 491 __be32 *p; > 492 > 493 if (xdr_stream_decode_u32(&rctxt->rc_stream, &segcount)) > ^^^^^^^^ > > 494 return false; > 495 > 496 /* A bogus segcount causes this buffer overflow check to fail. */ > 497 p = xdr_inline_decode(&rctxt->rc_stream, > --> 498 segcount * rpcrdma_segment_maxsz * sizeof(*p)); > > > segcount is an untrusted u32. On 32bit systems anything >= SIZE_MAX / 16 will > have an integer overflow and some those values will be accepted by > xdr_inline_decode(). Reported-by: Dan Carpenter Fixes: 78147ca8b4a9 ("svcrdma: Add a "parsed chunk list" data structure") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ae3fb9bc8a21..292022f0976e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -493,7 +493,13 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt) if (xdr_stream_decode_u32(&rctxt->rc_stream, &segcount)) return false; - /* A bogus segcount causes this buffer overflow check to fail. */ + /* Before trusting the segcount value enough to use it in + * a computation, perform a simple range check. This is an + * arbitrary but sensible limit (ie, not architectural). + */ + if (unlikely(segcount > RPCSVC_MAXPAGES)) + return false; + p = xdr_inline_decode(&rctxt->rc_stream, segcount * rpcrdma_segment_maxsz * sizeof(*p)); return p != NULL; From b7165ab074b8cd592dcd9304802ee1d999494c6d Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 23 Sep 2024 10:05:46 +0200 Subject: [PATCH 03/72] NFSD: Remove unnecessary posix_acl_entry pointer initialization The posix_acl_entry pointer pe is already initialized by the FOREACH_ACL_ENTRY() macro. Remove the unnecessary initialization. Signed-off-by: Thorsten Blum Signed-off-by: Chuck Lever --- fs/nfsd/nfs4acl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 96e786b5e544..936ea1ad9586 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -198,8 +198,6 @@ summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas) memset(pas, 0, sizeof(*pas)); pas->mask = 07; - pe = acl->a_entries + acl->a_count; - FOREACH_ACL_ENTRY(pa, acl, pe) { switch (pa->e_tag) { case ACL_USER_OBJ: From 612196ef5c50bdafe34314e36b085d4843fb2e9d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 29 Sep 2024 12:29:43 -0400 Subject: [PATCH 04/72] NFSD: Remove unused function parameter Clean up: Commit 65294c1f2c5e ("nfsd: add a new struct file caching facility to nfsd") moved the fh_verify() call site out of nfsd_open(). That was the only user of nfsd_open's @rqstp parameter, so that parameter can be removed. Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 3 +-- fs/nfsd/vfs.c | 11 ++++------- fs/nfsd/vfs.h | 4 ++-- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 2e6783f63712..1408166222c5 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1123,8 +1123,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, status = nfs_ok; trace_nfsd_file_opened(nf, status); } else { - ret = nfsd_open_verified(rqstp, fhp, may_flags, - &nf->nf_file); + ret = nfsd_open_verified(fhp, may_flags, &nf->nf_file); if (ret == -EOPENSTALE && stale_retry) { stale_retry = false; nfsd_file_unhash(nf); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d6d4f2a0e898..e0692401da33 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -861,8 +861,7 @@ int nfsd_open_break_lease(struct inode *inode, int access) * N.B. After this call fhp needs an fh_put */ static int -__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, - int may_flags, struct file **filp) +__nfsd_open(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { struct path path; struct inode *inode; @@ -932,7 +931,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, retry: err = fh_verify(rqstp, fhp, type, may_flags); if (!err) { - host_err = __nfsd_open(rqstp, fhp, type, may_flags, filp); + host_err = __nfsd_open(fhp, type, may_flags, filp); if (host_err == -EOPENSTALE && !retried) { retried = true; fh_put(fhp); @@ -945,7 +944,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, /** * nfsd_open_verified - Open a regular file for the filecache - * @rqstp: RPC request * @fhp: NFS filehandle of the file to open * @may_flags: internal permission flags * @filp: OUT: open "struct file *" @@ -953,10 +951,9 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, * Returns zero on success, or a negative errno value. */ int -nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, - struct file **filp) +nfsd_open_verified(struct svc_fh *fhp, int may_flags, struct file **filp) { - return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); + return __nfsd_open(fhp, S_IFREG, may_flags, filp); } /* diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 3ff146522556..854fb95dfdca 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -114,8 +114,8 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, int nfsd_open_break_lease(struct inode *, int); __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); -int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, - int may_flags, struct file **filp); +int nfsd_open_verified(struct svc_fh *fhp, int may_flags, + struct file **filp); __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, From d86fca3affca04b6c2cedd7060206c3e7091ecc8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 29 Sep 2024 20:50:11 -0400 Subject: [PATCH 05/72] xdrgen: Exit status should be zero on success To use xdrgen in Makefiles, it needs to exit with a zero status if the compilation worked. Otherwise the make command fails with an error. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdrgen | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/net/sunrpc/xdrgen/xdrgen b/tools/net/sunrpc/xdrgen/xdrgen index 95f303b2861b..43762be39252 100755 --- a/tools/net/sunrpc/xdrgen/xdrgen +++ b/tools/net/sunrpc/xdrgen/xdrgen @@ -128,5 +128,7 @@ There is NO WARRANTY, to the extent permitted by law.""", try: if __name__ == "__main__": sys.exit(main()) -except (SystemExit, KeyboardInterrupt, BrokenPipeError): +except SystemExit: + sys.exit(0) +except (KeyboardInterrupt, BrokenPipeError): sys.exit(1) From 5383ccd0cc23530b69a0822fba54605615b71946 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 29 Sep 2024 20:50:12 -0400 Subject: [PATCH 06/72] xdrgen: Clean up type_specifier Clean up: Make both arms of the type_specifier AST transformer match. No behavior change is expected. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index dbd3fcf9c957..5d96c544a07b 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -303,9 +303,9 @@ class ParseToAst(Transformer): c_classifier=c_classifier, ) - token = children[0].data + name = children[0].data.value return _XdrBuiltInType( - type_name=token.value, + type_name=name, c_classifier=c_classifier, ) From 041962d5c6a965f1a6c338be49acfe7ab51d2056 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 29 Sep 2024 20:50:13 -0400 Subject: [PATCH 07/72] xdrgen: Rename "variable-length strings" I misread RFC 4506. The built-in data type is called simply "string", as there is no fixed-length variety. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/generators/pointer.py | 8 ++++---- tools/net/sunrpc/xdrgen/generators/struct.py | 8 ++++---- tools/net/sunrpc/xdrgen/generators/typedef.py | 10 +++++----- tools/net/sunrpc/xdrgen/grammars/xdr.lark | 2 +- .../decoder/{variable_length_string.j2 => string.j2} | 0 .../{variable_length_string.j2 => string.j2} | 0 .../encoder/{variable_length_string.j2 => string.j2} | 0 .../decoder/{variable_length_string.j2 => string.j2} | 0 .../{variable_length_string.j2 => string.j2} | 0 .../encoder/{variable_length_string.j2 => string.j2} | 0 .../{variable_length_string.j2 => string.j2} | 0 .../decoder/{variable_length_string.j2 => string.j2} | 0 .../{variable_length_string.j2 => string.j2} | 0 .../encoder/{variable_length_string.j2 => string.j2} | 0 .../decoder/{variable_length_string.j2 => string.j2} | 0 tools/net/sunrpc/xdrgen/xdr_ast.py | 10 +++++----- 16 files changed, 19 insertions(+), 19 deletions(-) rename tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/pointer/definition/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/struct/decoder/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/struct/definition/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/struct/encoder/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/typedef/definition/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/{variable_length_string.j2 => string.j2} (100%) rename tools/net/sunrpc/xdrgen/templates/C/union/decoder/{variable_length_string.j2 => string.j2} (100%) diff --git a/tools/net/sunrpc/xdrgen/generators/pointer.py b/tools/net/sunrpc/xdrgen/generators/pointer.py index b0b27f1819c8..0aa3d35203f5 100644 --- a/tools/net/sunrpc/xdrgen/generators/pointer.py +++ b/tools/net/sunrpc/xdrgen/generators/pointer.py @@ -8,7 +8,7 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrPointer, _XdrDeclaration @@ -46,7 +46,7 @@ def emit_pointer_member_definition( elif isinstance(field, _XdrVariableLengthOpaque): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) elif isinstance(field, _XdrFixedLengthArray): @@ -119,7 +119,7 @@ def emit_pointer_member_decoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "decoder", field.template) print( template.render( @@ -198,7 +198,7 @@ def emit_pointer_member_encoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "encoder", field.template) print( template.render( diff --git a/tools/net/sunrpc/xdrgen/generators/struct.py b/tools/net/sunrpc/xdrgen/generators/struct.py index b694cd470829..6dd7f4d7cd53 100644 --- a/tools/net/sunrpc/xdrgen/generators/struct.py +++ b/tools/net/sunrpc/xdrgen/generators/struct.py @@ -8,7 +8,7 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrStruct, _XdrDeclaration @@ -46,7 +46,7 @@ def emit_struct_member_definition( elif isinstance(field, _XdrVariableLengthOpaque): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) elif isinstance(field, _XdrFixedLengthArray): @@ -119,7 +119,7 @@ def emit_struct_member_decoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "decoder", field.template) print( template.render( @@ -198,7 +198,7 @@ def emit_struct_member_encoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "encoder", field.template) print( template.render( diff --git a/tools/net/sunrpc/xdrgen/generators/typedef.py b/tools/net/sunrpc/xdrgen/generators/typedef.py index 85a1b2303333..6ea98445f5c8 100644 --- a/tools/net/sunrpc/xdrgen/generators/typedef.py +++ b/tools/net/sunrpc/xdrgen/generators/typedef.py @@ -8,7 +8,7 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrTypedef, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrTypedef, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrVoid, _XdrDeclaration @@ -28,7 +28,7 @@ def emit_typedef_declaration(environment: Environment, node: _XdrDeclaration) -> classifier=node.spec.c_classifier, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "declaration", node.template) print(template.render(name=node.name)) elif isinstance(node, _XdrFixedLengthOpaque): @@ -74,7 +74,7 @@ def emit_type_definition(environment: Environment, node: _XdrDeclaration) -> Non classifier=node.spec.c_classifier, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "definition", node.template) print(template.render(name=node.name)) elif isinstance(node, _XdrFixedLengthOpaque): @@ -119,7 +119,7 @@ def emit_typedef_decoder(environment: Environment, node: _XdrDeclaration) -> Non type=node.spec.type_name, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "decoder", node.template) print( template.render( @@ -180,7 +180,7 @@ def emit_typedef_encoder(environment: Environment, node: _XdrDeclaration) -> Non type=node.spec.type_name, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "encoder", node.template) print( template.render( diff --git a/tools/net/sunrpc/xdrgen/grammars/xdr.lark b/tools/net/sunrpc/xdrgen/grammars/xdr.lark index f3c4552e548d..0e1aeb02d667 100644 --- a/tools/net/sunrpc/xdrgen/grammars/xdr.lark +++ b/tools/net/sunrpc/xdrgen/grammars/xdr.lark @@ -3,7 +3,7 @@ declaration : "opaque" identifier "[" value "]" -> fixed_length_opaque | "opaque" identifier "<" [ value ] ">" -> variable_length_opaque - | "string" identifier "<" [ value ] ">" -> variable_length_string + | "string" identifier "<" [ value ] ">" -> string | type_specifier identifier "[" value "]" -> fixed_length_array | type_specifier identifier "<" [ value ] ">" -> variable_length_array | type_specifier "*" identifier -> optional_data diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/pointer/definition/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/pointer/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/struct/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/definition/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/struct/definition/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/struct/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/struct/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/typedef/definition/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/typedef/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/string.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_string.j2 rename to tools/net/sunrpc/xdrgen/templates/C/union/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index 5d96c544a07b..17d1689b5858 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -88,12 +88,12 @@ class _XdrVariableLengthOpaque(_XdrDeclaration): @dataclass -class _XdrVariableLengthString(_XdrDeclaration): +class _XdrString(_XdrDeclaration): """A (NUL-terminated) variable-length string declaration""" name: str maxsize: str - template: str = "variable_length_string" + template: str = "string" @dataclass @@ -350,15 +350,15 @@ class ParseToAst(Transformer): return _XdrVariableLengthOpaque(name, maxsize) - def variable_length_string(self, children): - """Instantiate one _XdrVariableLengthString declaration object""" + def string(self, children): + """Instantiate one _XdrString declaration object""" name = children[0].symbol if children[1] is not None: maxsize = children[1].value else: maxsize = "0" - return _XdrVariableLengthString(name, maxsize) + return _XdrString(name, maxsize) def fixed_length_array(self, children): """Instantiate one _XdrFixedLengthArray declaration object""" From c060f8168bdf22aa986970955af99702d142dfbe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 29 Sep 2024 20:50:14 -0400 Subject: [PATCH 08/72] xdrgen: Rename enum's declaration Jinja2 template "close.j2" is a confusing name. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/generators/enum.py | 2 +- .../xdrgen/templates/C/enum/declaration/{close.j2 => enum.j2} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tools/net/sunrpc/xdrgen/templates/C/enum/declaration/{close.j2 => enum.j2} (100%) diff --git a/tools/net/sunrpc/xdrgen/generators/enum.py b/tools/net/sunrpc/xdrgen/generators/enum.py index 855e43f4ae38..e37b5c297821 100644 --- a/tools/net/sunrpc/xdrgen/generators/enum.py +++ b/tools/net/sunrpc/xdrgen/generators/enum.py @@ -18,7 +18,7 @@ class XdrEnumGenerator(SourceGenerator): def emit_declaration(self, node: _XdrEnum) -> None: """Emit one declaration pair for an XDR enum type""" if node.name in public_apis: - template = self.environment.get_template("declaration/close.j2") + template = self.environment.get_template("declaration/enum.j2") print(template.render(name=node.name)) def emit_definition(self, node: _XdrEnum) -> None: diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 similarity index 100% rename from tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 rename to tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 From 6e853dcd2d3d6f796597c1042340a2de0ce2469f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 29 Sep 2024 20:50:15 -0400 Subject: [PATCH 09/72] xdrgen: Rename "enum yada" types as just "yada" This simplifies the generated C code and makes way for supporting big-endian XDR enums. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 | 4 ++-- tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 | 2 +- tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 | 1 + tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 | 2 +- tools/net/sunrpc/xdrgen/xdr_ast.py | 4 ---- 5 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 index ab1e576c9531..d1405c7c5354 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 @@ -1,4 +1,4 @@ {# SPDX-License-Identifier: GPL-2.0 #} -bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} *ptr); -bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} value); +bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr); +bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value); diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 index 341d829afeda..6482984f1cb7 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 @@ -8,7 +8,7 @@ bool {% else %} static bool __maybe_unused {% endif %} -xdrgen_decode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} *ptr) +xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr) { u32 val; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 index 9e62344a976a..a07586cbee17 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 @@ -1,2 +1,3 @@ {# SPDX-License-Identifier: GPL-2.0 #} }; +typedef enum {{ name }} {{ name }}; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 index bd0a770e50f2..67245b9a914d 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 @@ -8,7 +8,7 @@ bool {% else %} static bool __maybe_unused {% endif %} -xdrgen_encode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} value) +xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value) { return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; } diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index 17d1689b5858..576e1ecfe1d7 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -15,7 +15,6 @@ this_module = sys.modules[__name__] excluded_apis = [] header_name = "none" public_apis = [] -enums = set() structs = set() pass_by_reference = set() @@ -294,8 +293,6 @@ class ParseToAst(Transformer): c_classifier = "" if isinstance(children[0], _XdrIdentifier): name = children[0].symbol - if name in enums: - c_classifier = "enum " if name in structs: c_classifier = "struct " return _XdrDefinedType( @@ -320,7 +317,6 @@ class ParseToAst(Transformer): def enum(self, children): """Instantiate one _XdrEnum object""" enum_name = children[0].symbol - enums.add(enum_name) i = 0 enumerators = [] From b376d519bd142c65ba9bba35db12b6be95b46893 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 29 Sep 2024 20:50:16 -0400 Subject: [PATCH 10/72] xdrgen: Implement big-endian enums Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 21 ++++++++++++ tools/net/sunrpc/xdrgen/README | 17 ++++++++++ tools/net/sunrpc/xdrgen/generators/enum.py | 17 +++++++--- tools/net/sunrpc/xdrgen/generators/union.py | 34 ++++++++++++++----- tools/net/sunrpc/xdrgen/grammars/xdr.lark | 4 ++- .../templates/C/enum/decoder/enum_be.j2 | 14 ++++++++ .../templates/C/enum/definition/close_be.j2 | 3 ++ .../templates/C/enum/encoder/enum_be.j2 | 14 ++++++++ .../templates/C/union/decoder/case_spec_be.j2 | 2 ++ .../templates/C/union/encoder/case_spec_be.j2 | 2 ++ tools/net/sunrpc/xdrgen/xdr_ast.py | 3 ++ 11 files changed, 117 insertions(+), 14 deletions(-) create mode 100644 tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5f775e104f9a..a2ab813a9800 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -680,6 +680,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) return 0; } +/** + * xdr_stream_decode_be32 - Decode a big-endian 32-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = *p; + return 0; +} + /** * xdr_stream_decode_u64 - Decode a 64-bit integer * @xdr: pointer to xdr_stream diff --git a/tools/net/sunrpc/xdrgen/README b/tools/net/sunrpc/xdrgen/README index 92f7738ad50c..27218a78ab40 100644 --- a/tools/net/sunrpc/xdrgen/README +++ b/tools/net/sunrpc/xdrgen/README @@ -150,6 +150,23 @@ Pragma directives specify exceptions to the normal generation of encoding and decoding functions. Currently one directive is implemented: "public". +Pragma big_endian +------ ---------- + + pragma big_endian ; + +For variables that might contain only a small number values, it +is more efficient to avoid the byte-swap when encoding or decoding +on little-endian machines. Such is often the case with error status +codes. For example: + + pragma big_endian nfsstat3; + +In this case, when generating an XDR struct or union containing a +field of type "nfsstat3", xdrgen will make the type of that field +"__be32" instead of "enum nfsstat3". XDR unions then switch on the +non-byte-swapped value of that field. + Pragma exclude ------ ------- diff --git a/tools/net/sunrpc/xdrgen/generators/enum.py b/tools/net/sunrpc/xdrgen/generators/enum.py index e37b5c297821..e63f45b8eb74 100644 --- a/tools/net/sunrpc/xdrgen/generators/enum.py +++ b/tools/net/sunrpc/xdrgen/generators/enum.py @@ -4,7 +4,7 @@ """Generate code to handle XDR enum types""" from generators import SourceGenerator, create_jinja2_environment -from xdr_ast import _XdrEnum, public_apis +from xdr_ast import _XdrEnum, public_apis, big_endian class XdrEnumGenerator(SourceGenerator): @@ -30,15 +30,24 @@ class XdrEnumGenerator(SourceGenerator): for enumerator in node.enumerators: print(template.render(name=enumerator.name, value=enumerator.value)) - template = self.environment.get_template("definition/close.j2") + if node.name in big_endian: + template = self.environment.get_template("definition/close_be.j2") + else: + template = self.environment.get_template("definition/close.j2") print(template.render(name=node.name)) def emit_decoder(self, node: _XdrEnum) -> None: """Emit one decoder function for an XDR enum type""" - template = self.environment.get_template("decoder/enum.j2") + if node.name in big_endian: + template = self.environment.get_template("decoder/enum_be.j2") + else: + template = self.environment.get_template("decoder/enum.j2") print(template.render(name=node.name)) def emit_encoder(self, node: _XdrEnum) -> None: """Emit one encoder function for an XDR enum type""" - template = self.environment.get_template("encoder/enum.j2") + if node.name in big_endian: + template = self.environment.get_template("encoder/enum_be.j2") + else: + template = self.environment.get_template("encoder/enum.j2") print(template.render(name=node.name)) diff --git a/tools/net/sunrpc/xdrgen/generators/union.py b/tools/net/sunrpc/xdrgen/generators/union.py index 7974967bbb9f..4522a5b7a943 100644 --- a/tools/net/sunrpc/xdrgen/generators/union.py +++ b/tools/net/sunrpc/xdrgen/generators/union.py @@ -8,7 +8,7 @@ from jinja2 import Environment from generators import SourceGenerator from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid +from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, big_endian from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis @@ -77,13 +77,18 @@ def emit_union_switch_spec_decoder( print(template.render(name=node.name, type=node.spec.type_name)) -def emit_union_case_spec_decoder(environment: Environment, node: _XdrCaseSpec) -> None: +def emit_union_case_spec_decoder( + environment: Environment, node: _XdrCaseSpec, big_endian_discriminant: bool +) -> None: """Emit decoder functions for an XDR union's case arm""" if isinstance(node.arm, _XdrVoid): return - template = get_jinja2_template(environment, "decoder", "case_spec") + if big_endian_discriminant: + template = get_jinja2_template(environment, "decoder", "case_spec_be") + else: + template = get_jinja2_template(environment, "decoder", "case_spec") for case in node.values: print(template.render(case=case)) @@ -136,7 +141,11 @@ def emit_union_decoder(environment: Environment, node: _XdrUnion) -> None: emit_union_switch_spec_decoder(environment, node.discriminant) for case in node.cases: - emit_union_case_spec_decoder(environment, case) + emit_union_case_spec_decoder( + environment, + case, + node.discriminant.spec.type_name in big_endian, + ) emit_union_default_spec_decoder(environment, node) @@ -153,17 +162,21 @@ def emit_union_switch_spec_encoder( print(template.render(name=node.name, type=node.spec.type_name)) -def emit_union_case_spec_encoder(environment: Environment, node: _XdrCaseSpec) -> None: +def emit_union_case_spec_encoder( + environment: Environment, node: _XdrCaseSpec, big_endian_discriminant: bool +) -> None: """Emit encoder functions for an XDR union's case arm""" if isinstance(node.arm, _XdrVoid): return - template = get_jinja2_template(environment, "encoder", "case_spec") + if big_endian_discriminant: + template = get_jinja2_template(environment, "encoder", "case_spec_be") + else: + template = get_jinja2_template(environment, "encoder", "case_spec") for case in node.values: print(template.render(case=case)) - assert isinstance(node.arm, _XdrBasic) template = get_jinja2_template(environment, "encoder", node.arm.template) print( template.render( @@ -192,7 +205,6 @@ def emit_union_default_spec_encoder(environment: Environment, node: _XdrUnion) - print(template.render()) return - assert isinstance(default_case.arm, _XdrBasic) template = get_jinja2_template(environment, "encoder", default_case.arm.template) print( template.render( @@ -210,7 +222,11 @@ def emit_union_encoder(environment, node: _XdrUnion) -> None: emit_union_switch_spec_encoder(environment, node.discriminant) for case in node.cases: - emit_union_case_spec_encoder(environment, case) + emit_union_case_spec_encoder( + environment, + case, + node.discriminant.spec.type_name in big_endian, + ) emit_union_default_spec_encoder(environment, node) diff --git a/tools/net/sunrpc/xdrgen/grammars/xdr.lark b/tools/net/sunrpc/xdrgen/grammars/xdr.lark index 0e1aeb02d667..7c2c1b8c86d1 100644 --- a/tools/net/sunrpc/xdrgen/grammars/xdr.lark +++ b/tools/net/sunrpc/xdrgen/grammars/xdr.lark @@ -87,12 +87,14 @@ procedure_def : type_specifier identifier "(" type_specifier ")" "=" c pragma_def : "pragma" directive identifier [ identifier ] ";" -directive : exclude_directive +directive : big_endian_directive + | exclude_directive | header_directive | pages_directive | public_directive | skip_directive +big_endian_directive : "big_endian" exclude_directive : "exclude" header_directive : "header" pages_directive : "pages" diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 new file mode 100644 index 000000000000..44c391c10b42 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 @@ -0,0 +1,14 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +{% if annotate %} +/* enum {{ name }} (big-endian) */ +{% endif %} +{% if name in public_apis %} +bool +{% else %} +static bool __maybe_unused +{% endif %} +xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr) +{ + return xdr_stream_decode_be32(xdr, ptr) == 0; +} diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 new file mode 100644 index 000000000000..2c18948bddf7 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +}; +typedef __be32 {{ name }}; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 new file mode 100644 index 000000000000..fbbcc45948d6 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 @@ -0,0 +1,14 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +{% if annotate %} +/* enum {{ name }} (big-endian) */ +{% endif %} +{% if name in public_apis %} +bool +{% else %} +static bool __maybe_unused +{% endif %} +xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value) +{ + return xdr_stream_encode_be32(xdr, value) == XDR_UNIT; +} diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 new file mode 100644 index 000000000000..917f3a1c4588 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + case __constant_cpu_to_be32({{ case }}): diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 new file mode 100644 index 000000000000..917f3a1c4588 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + case __constant_cpu_to_be32({{ case }}): diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index 576e1ecfe1d7..d5f0535ec84c 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -12,6 +12,7 @@ from lark.tree import Meta this_module = sys.modules[__name__] +big_endian = [] excluded_apis = [] header_name = "none" public_apis = [] @@ -480,6 +481,8 @@ class ParseToAst(Transformer): """Instantiate one _Pragma object""" directive = children[0].children[0].data match directive: + case "big_endian_directive": + big_endian.append(children[1].symbol) case "exclude_directive": excluded_apis.append(children[1].symbol) case "header_directive": From 1acd13cbc7c9c69a09e5d8325cf6c3e3f0a75049 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:31 -0400 Subject: [PATCH 11/72] xdrgen: Refactor transformer arms Clean up: Add a __post_init__ function to the data classes that need to update the "structs" and "pass_by_reference" sets. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 57 +++++++++++++++++------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index d5f0535ec84c..68f09945f2c4 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -51,13 +51,17 @@ class _XdrTypeSpecifier(_XdrAst): """Corresponds to 'type_specifier' in the XDR language grammar""" type_name: str - c_classifier: str + c_classifier: str = "" @dataclass class _XdrDefinedType(_XdrTypeSpecifier): """Corresponds to a type defined by the input specification""" + def __post_init__(self): + if self.type_name in structs: + self.c_classifier = "struct " + @dataclass class _XdrBuiltInType(_XdrTypeSpecifier): @@ -124,6 +128,10 @@ class _XdrOptionalData(_XdrDeclaration): spec: _XdrTypeSpecifier template: str = "optional_data" + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + @dataclass class _XdrBasic(_XdrDeclaration): @@ -174,6 +182,10 @@ class _XdrStruct(_XdrAst): name: str fields: List[_XdrDeclaration] + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + @dataclass class _XdrPointer(_XdrAst): @@ -182,6 +194,10 @@ class _XdrPointer(_XdrAst): name: str fields: List[_XdrDeclaration] + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + @dataclass class _XdrTypedef(_XdrAst): @@ -189,6 +205,15 @@ class _XdrTypedef(_XdrAst): declaration: _XdrDeclaration + def __post_init__(self): + if not isinstance(self.declaration, _XdrBasic): + return + + new_type = self.declaration + if isinstance(new_type.spec, _XdrDefinedType): + if new_type.spec.type_name in pass_by_reference: + pass_by_reference.add(new_type.name) + @dataclass class _XdrCaseSpec(_XdrAst): @@ -216,6 +241,10 @@ class _XdrUnion(_XdrAst): cases: List[_XdrCaseSpec] default: _XdrDeclaration + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + @dataclass class _RpcProcedure(_XdrAst): @@ -290,22 +319,13 @@ class ParseToAst(Transformer): return _XdrConstantValue(value) def type_specifier(self, children): - """Instantiate one type_specifier object""" - c_classifier = "" + """Instantiate one _XdrTypeSpecifier object""" if isinstance(children[0], _XdrIdentifier): name = children[0].symbol - if name in structs: - c_classifier = "struct " - return _XdrDefinedType( - type_name=name, - c_classifier=c_classifier, - ) + return _XdrDefinedType(type_name=name) name = children[0].data.value - return _XdrBuiltInType( - type_name=name, - c_classifier=c_classifier, - ) + return _XdrBuiltInType(type_name=name) def constant_def(self, children): """Instantiate one _XdrConstant object""" @@ -380,8 +400,6 @@ class ParseToAst(Transformer): """Instantiate one _XdrOptionalData declaration object""" spec = children[0] name = children[1].symbol - structs.add(name) - pass_by_reference.add(name) return _XdrOptionalData(name, spec) @@ -400,8 +418,6 @@ class ParseToAst(Transformer): def struct(self, children): """Instantiate one _XdrStruct object""" name = children[0].symbol - structs.add(name) - pass_by_reference.add(name) fields = children[1].children last_field = fields[-1] @@ -416,11 +432,6 @@ class ParseToAst(Transformer): def typedef(self, children): """Instantiate one _XdrTypedef object""" new_type = children[0] - if isinstance(new_type, _XdrBasic) and isinstance( - new_type.spec, _XdrDefinedType - ): - if new_type.spec.type_name in pass_by_reference: - pass_by_reference.add(new_type.name) return _XdrTypedef(new_type) @@ -442,8 +453,6 @@ class ParseToAst(Transformer): def union(self, children): """Instantiate one _XdrUnion object""" name = children[0].symbol - structs.add(name) - pass_by_reference.add(name) body = children[1] discriminant = body.children[0].children[0] From 189f55d93d3eb76d733c28f0c70fd2d162a9ffc5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:32 -0400 Subject: [PATCH 12/72] xdrgen: Track constant values In order to compute the numeric on-the-wire width of XDR types, xdrgen needs to keep track of the numeric value of constants that are defined in the input specification so it can perform calculations with those values. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index 68f09945f2c4..b7df45f47707 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -19,6 +19,8 @@ public_apis = [] structs = set() pass_by_reference = set() +constants = {} + @dataclass class _XdrAst(ast_utils.Ast): @@ -156,6 +158,10 @@ class _XdrConstant(_XdrAst): name: str value: str + def __post_init__(self): + if self.value not in constants: + constants[self.name] = int(self.value, 0) + @dataclass class _XdrEnumerator(_XdrAst): @@ -164,6 +170,10 @@ class _XdrEnumerator(_XdrAst): name: str value: str + def __post_init__(self): + if self.value not in constants: + constants[self.name] = int(self.value, 0) + @dataclass class _XdrEnum(_XdrAst): From 631c2925bae41c11dcf3915a2ab5f3be9af54277 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:33 -0400 Subject: [PATCH 13/72] xdrgen: Keep track of on-the-wire data type widths The generic parts of the RPC layer need to know the widths (in XDR_UNIT increments) of the XDR data types defined for each protocol. As a first step, add dictionaries to keep track of the symbolic and actual maximum XDR width of XDR types. This makes it straightforward to look up the width of a type by its name. The built-in dictionaries are pre-loaded with the widths of the built-in XDR types as defined in RFC 4506. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdrgen/_defs.h | 9 ++++++ tools/net/sunrpc/xdrgen/xdr_ast.py | 43 +++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/include/linux/sunrpc/xdrgen/_defs.h b/include/linux/sunrpc/xdrgen/_defs.h index be9e62371758..20c7270aa64d 100644 --- a/include/linux/sunrpc/xdrgen/_defs.h +++ b/include/linux/sunrpc/xdrgen/_defs.h @@ -23,4 +23,13 @@ typedef struct { u8 *data; } opaque; +#define XDR_void (0) +#define XDR_bool (1) +#define XDR_int (1) +#define XDR_unsigned_int (1) +#define XDR_long (1) +#define XDR_unsigned_long (1) +#define XDR_hyper (2) +#define XDR_unsigned_hyper (2) + #endif /* _SUNRPC_XDRGEN__DEFS_H_ */ diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index b7df45f47707..f1d93a1d0ed8 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -21,6 +21,31 @@ pass_by_reference = set() constants = {} +symbolic_widths = { + "void": ["XDR_void"], + "bool": ["XDR_bool"], + "int": ["XDR_int"], + "unsigned_int": ["XDR_unsigned_int"], + "long": ["XDR_long"], + "unsigned_long": ["XDR_unsigned_long"], + "hyper": ["XDR_hyper"], + "unsigned_hyper": ["XDR_unsigned_hyper"], +} + +# Numeric XDR widths are tracked in a dictionary that is keyed +# by type_name because sometimes a caller has nothing more than +# the type_name to use to figure out the numeric width. +max_widths = { + "void": 0, + "bool": 1, + "int": 1, + "unsigned_int": 1, + "long": 1, + "unsigned_long": 1, + "hyper": 2, + "unsigned_hyper": 2, +} + @dataclass class _XdrAst(ast_utils.Ast): @@ -60,15 +85,24 @@ class _XdrTypeSpecifier(_XdrAst): class _XdrDefinedType(_XdrTypeSpecifier): """Corresponds to a type defined by the input specification""" + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return [get_header_name().upper() + "_" + self.type_name + "_sz"] + def __post_init__(self): if self.type_name in structs: self.c_classifier = "struct " + symbolic_widths[self.type_name] = self.symbolic_width() @dataclass class _XdrBuiltInType(_XdrTypeSpecifier): """Corresponds to a built-in XDR type""" + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return symbolic_widths[self.type_name] + @dataclass class _XdrDeclaration(_XdrAst): @@ -148,8 +182,17 @@ class _XdrBasic(_XdrDeclaration): class _XdrVoid(_XdrDeclaration): """A void declaration""" + name: str = "void" template: str = "void" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 0 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return [] + @dataclass class _XdrConstant(_XdrAst): From 3f890755c8f5958ef537a6d8f14de5ec4bfdc3fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:34 -0400 Subject: [PATCH 14/72] xdrgen: XDR widths for enum types RFC 4506 says that an XDR enum is represented as a signed integer on the wire; thus its width is 1 XDR_UNIT. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index f1d93a1d0ed8..fbee954c7f70 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -227,6 +227,18 @@ class _XdrEnum(_XdrAst): maximum: int enumerators: List[_XdrEnumerator] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_int"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrStruct(_XdrAst): From 16c98ce04a6929019f66dab40367fb14d0afc678 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:35 -0400 Subject: [PATCH 15/72] xdrgen: XDR width for fixed-length opaque The XDR width for a fixed-length opaque is the byte size of the opaque rounded up to the next XDR_UNIT, divided by XDR_UNIT. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index fbee954c7f70..9fe7fa688caa 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -21,6 +21,16 @@ pass_by_reference = set() constants = {} + +def xdr_quadlen(val: str) -> int: + """Return integer XDR width of an XDR type""" + if val in constants: + octets = constants[val] + else: + octets = int(val) + return int((octets + 3) / 4) + + symbolic_widths = { "void": ["XDR_void"], "bool": ["XDR_bool"], @@ -117,6 +127,18 @@ class _XdrFixedLengthOpaque(_XdrDeclaration): size: str template: str = "fixed_length_opaque" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return xdr_quadlen(self.size) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_QUADLEN(" + self.size + ")"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVariableLengthOpaque(_XdrDeclaration): From b0b85ef754740102cd659aea10aa516fe27f6b36 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:36 -0400 Subject: [PATCH 16/72] xdrgen: XDR width for variable-length opaque The byte size of a variable-length opaque is conveyed in an unsigned integer. If there is a specified maximum size, that is included in the type's widths list. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index 9fe7fa688caa..94cdcfb36e77 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -148,6 +148,21 @@ class _XdrVariableLengthOpaque(_XdrDeclaration): maxsize: str template: str = "variable_length_opaque" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + xdr_quadlen(self.maxsize) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + widths.append("XDR_QUADLEN(" + self.maxsize + ")") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrString(_XdrDeclaration): From da298d01136e2f80a1a3a47f81d8bb3ade2d306c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:37 -0400 Subject: [PATCH 17/72] xdrgen: XDR width for a string A string works like a variable-length opaque. See Section 4.11 of RFC 4506. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index 94cdcfb36e77..d5f48c094729 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -172,6 +172,21 @@ class _XdrString(_XdrDeclaration): maxsize: str template: str = "string" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + xdr_quadlen(self.maxsize) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + widths.append("XDR_QUADLEN(" + self.maxsize + ")") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrFixedLengthArray(_XdrDeclaration): From 59b01b9636646bbf2eee59e19cc7da5b584f24c7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:38 -0400 Subject: [PATCH 18/72] xdrgen: XDR width for fixed-length array Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index d5f48c094729..e9bc81e83b48 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -197,6 +197,19 @@ class _XdrFixedLengthArray(_XdrDeclaration): size: str template: str = "fixed_length_array" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return xdr_quadlen(self.size) * max_widths[self.spec.type_name] + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + item_width = " + ".join(symbolic_widths[self.spec.type_name]) + return ["(" + self.size + " * (" + item_width + "))"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVariableLengthArray(_XdrDeclaration): From 2db8940e6ceda6aeb566429e8d58c34ab093d3c7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:39 -0400 Subject: [PATCH 19/72] xdrgen: XDR width for variable-length array Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index e9bc81e83b48..cb89d5d9987c 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -220,6 +220,22 @@ class _XdrVariableLengthArray(_XdrDeclaration): maxsize: str template: str = "variable_length_array" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + (xdr_quadlen(self.maxsize) * max_widths[self.spec.type_name]) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + item_width = " + ".join(symbolic_widths[self.spec.type_name]) + widths.append("(" + self.maxsize + " * (" + item_width + "))") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrOptionalData(_XdrDeclaration): From dc6fa83b6aff5c50277045f53a448afce9616b07 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:40 -0400 Subject: [PATCH 20/72] xdrgen: XDR width for optional_data type Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index cb89d5d9987c..f2ef78654e36 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -245,9 +245,19 @@ class _XdrOptionalData(_XdrDeclaration): spec: _XdrTypeSpecifier template: str = "optional_data" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_bool"] + def __post_init__(self): structs.add(self.name) pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() @dataclass From 2852c92ba1305fd2d85fd69f73bb4b43a3c58146 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:41 -0400 Subject: [PATCH 21/72] xdrgen: XDR width for typedef The XDR width of a typedef is the same as the width of the base type. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 34 ++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index f2ef78654e36..8996f26cbd55 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -268,6 +268,18 @@ class _XdrBasic(_XdrDeclaration): spec: _XdrTypeSpecifier template: str = "basic" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return max_widths[self.spec.type_name] + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return symbolic_widths[self.spec.type_name] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVoid(_XdrDeclaration): @@ -361,14 +373,22 @@ class _XdrTypedef(_XdrAst): declaration: _XdrDeclaration - def __post_init__(self): - if not isinstance(self.declaration, _XdrBasic): - return + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return self.declaration.max_width() - new_type = self.declaration - if isinstance(new_type.spec, _XdrDefinedType): - if new_type.spec.type_name in pass_by_reference: - pass_by_reference.add(new_type.name) + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return self.declaration.symbolic_width() + + def __post_init__(self): + if isinstance(self.declaration, _XdrBasic): + new_type = self.declaration + if isinstance(new_type.spec, _XdrDefinedType): + if new_type.spec.type_name in pass_by_reference: + pass_by_reference.add(new_type.name) + max_widths[new_type.name] = self.max_width() + symbolic_widths[new_type.name] = self.symbolic_width() @dataclass From f4bc1e996a34a47f6c8334edcd8ddcd7dc0634b1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:42 -0400 Subject: [PATCH 22/72] xdrgen: XDR width for struct types The XDR width of a struct type is the sum of the widths of each of the struct's fields. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index 8996f26cbd55..f34b147c8dfd 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -350,9 +350,25 @@ class _XdrStruct(_XdrAst): name: str fields: List[_XdrDeclaration] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + width = 0 + for field in self.fields: + width += field.max_width() + return width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = [] + for field in self.fields: + widths += field.symbolic_width() + return widths + def __post_init__(self): structs.add(self.name) pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() @dataclass From 447dc1efebac1484d5903ba34655289e7725df6d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:43 -0400 Subject: [PATCH 23/72] xdrgen: XDR width for pointer types The XDR width of a pointer type is the sum of the widths of each of the struct's fields, except for the last field. The width of the implicit boolean "value follows" field is added as well. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index f34b147c8dfd..8d53c889eee8 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -378,9 +378,26 @@ class _XdrPointer(_XdrAst): name: str fields: List[_XdrDeclaration] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + width = 1 + for field in self.fields[0:-1]: + width += field.max_width() + return width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = [] + widths += ["XDR_bool"] + for field in self.fields[0:-1]: + widths += field.symbolic_width() + return widths + def __post_init__(self): structs.add(self.name) pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() @dataclass From ce5a75d9939fab904d27b403011eddd2b173b495 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:44 -0400 Subject: [PATCH 24/72] xdrgen: XDR width for union types Not yet complete. The tool doesn't do any math yet. Thus, even though the maximum XDR width of a union is the width of the union enumerator plus the width of its largest arm, we're using the sum of all the elements of the union for the moment. This means that buffer size requirements are overestimated, and that the generated maxsize macro cannot yet be used for determining data element alignment in the XDR buffer. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/xdr_ast.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index 8d53c889eee8..5233e73c7046 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -450,9 +450,35 @@ class _XdrUnion(_XdrAst): cases: List[_XdrCaseSpec] default: _XdrDeclaration + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + max_width = 0 + for case in self.cases: + if case.arm.max_width() > max_width: + max_width = case.arm.max_width() + if self.default: + if self.default.arm.max_width() > max_width: + max_width = self.default.arm.max_width() + return 1 + max_width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + max_width = 0 + for case in self.cases: + if case.arm.max_width() > max_width: + max_width = case.arm.max_width() + width = case.arm.symbolic_width() + if self.default: + if self.default.arm.max_width() > max_width: + max_width = self.default.arm.max_width() + width = self.default.arm.symbolic_width() + return symbolic_widths[self.discriminant.name] + width + def __post_init__(self): structs.add(self.name) pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() @dataclass From e9e1e7e75acd737cf41c6ee64d62da6ea0c10036 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:45 -0400 Subject: [PATCH 25/72] xdrgen: Add generator code for XDR width macros Introduce logic in the code generators to emit maxsize (XDR width) definitions. In C, these are pre-processor macros. Signed-off-by: Chuck Lever --- .../net/sunrpc/xdrgen/generators/__init__.py | 4 ++++ tools/net/sunrpc/xdrgen/generators/enum.py | 13 +++++++++++- tools/net/sunrpc/xdrgen/generators/pointer.py | 18 ++++++++++++++++- tools/net/sunrpc/xdrgen/generators/struct.py | 18 ++++++++++++++++- tools/net/sunrpc/xdrgen/generators/typedef.py | 18 ++++++++++++++++- tools/net/sunrpc/xdrgen/generators/union.py | 20 +++++++++++++++++-- .../xdrgen/templates/C/enum/maxsize/enum.j2 | 2 ++ .../templates/C/pointer/maxsize/pointer.j2 | 3 +++ .../templates/C/struct/maxsize/struct.j2 | 3 +++ .../templates/C/typedef/maxsize/basic.j2 | 3 +++ .../C/typedef/maxsize/fixed_length_opaque.j2 | 2 ++ .../templates/C/typedef/maxsize/string.j2 | 2 ++ .../typedef/maxsize/variable_length_array.j2 | 2 ++ .../typedef/maxsize/variable_length_opaque.j2 | 2 ++ .../xdrgen/templates/C/union/maxsize/union.j2 | 3 +++ 15 files changed, 107 insertions(+), 6 deletions(-) create mode 100644 tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 create mode 100644 tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 diff --git a/tools/net/sunrpc/xdrgen/generators/__init__.py b/tools/net/sunrpc/xdrgen/generators/__init__.py index fd2457461274..b98574a36a4a 100644 --- a/tools/net/sunrpc/xdrgen/generators/__init__.py +++ b/tools/net/sunrpc/xdrgen/generators/__init__.py @@ -111,3 +111,7 @@ class SourceGenerator: def emit_encoder(self, node: _XdrAst) -> None: """Emit one encoder function for this XDR type""" raise NotImplementedError("Encoder generation not supported") + + def emit_maxsize(self, node: _XdrAst) -> None: + """Emit one maxsize macro for this XDR type""" + raise NotImplementedError("Maxsize macro generation not supported") diff --git a/tools/net/sunrpc/xdrgen/generators/enum.py b/tools/net/sunrpc/xdrgen/generators/enum.py index e63f45b8eb74..e62f715d3996 100644 --- a/tools/net/sunrpc/xdrgen/generators/enum.py +++ b/tools/net/sunrpc/xdrgen/generators/enum.py @@ -4,7 +4,7 @@ """Generate code to handle XDR enum types""" from generators import SourceGenerator, create_jinja2_environment -from xdr_ast import _XdrEnum, public_apis, big_endian +from xdr_ast import _XdrEnum, public_apis, big_endian, get_header_name class XdrEnumGenerator(SourceGenerator): @@ -51,3 +51,14 @@ class XdrEnumGenerator(SourceGenerator): else: template = self.environment.get_template("encoder/enum.j2") print(template.render(name=node.name)) + + def emit_maxsize(self, node: _XdrEnum) -> None: + """Emit one maxsize macro for an XDR enum type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = self.environment.get_template("maxsize/enum.j2") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) diff --git a/tools/net/sunrpc/xdrgen/generators/pointer.py b/tools/net/sunrpc/xdrgen/generators/pointer.py index 0aa3d35203f5..6dbda60ad2db 100644 --- a/tools/net/sunrpc/xdrgen/generators/pointer.py +++ b/tools/net/sunrpc/xdrgen/generators/pointer.py @@ -12,7 +12,7 @@ from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrPointer, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_pointer_declaration(environment: Environment, node: _XdrPointer) -> None: @@ -247,6 +247,18 @@ def emit_pointer_encoder(environment: Environment, node: _XdrPointer) -> None: print(template.render()) +def emit_pointer_maxsize(environment: Environment, node: _XdrPointer) -> None: + """Emit one maxsize macro for an XDR pointer type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "pointer") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrPointerGenerator(SourceGenerator): """Generate source code for XDR pointer""" @@ -270,3 +282,7 @@ class XdrPointerGenerator(SourceGenerator): def emit_encoder(self, node: _XdrPointer) -> None: """Emit one encoder function for an XDR pointer type""" emit_pointer_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrPointer) -> None: + """Emit one maxsize macro for an XDR pointer type""" + emit_pointer_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/generators/struct.py b/tools/net/sunrpc/xdrgen/generators/struct.py index 6dd7f4d7cd53..64911de46f62 100644 --- a/tools/net/sunrpc/xdrgen/generators/struct.py +++ b/tools/net/sunrpc/xdrgen/generators/struct.py @@ -12,7 +12,7 @@ from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrStruct, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_struct_declaration(environment: Environment, node: _XdrStruct) -> None: @@ -247,6 +247,18 @@ def emit_struct_encoder(environment: Environment, node: _XdrStruct) -> None: print(template.render()) +def emit_struct_maxsize(environment: Environment, node: _XdrStruct) -> None: + """Emit one maxsize macro for an XDR struct type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "struct") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrStructGenerator(SourceGenerator): """Generate source code for XDR structs""" @@ -270,3 +282,7 @@ class XdrStructGenerator(SourceGenerator): def emit_encoder(self, node: _XdrStruct) -> None: """Emit one encoder function for an XDR struct type""" emit_struct_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrStruct) -> None: + """Emit one maxsize macro for an XDR struct type""" + emit_struct_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/generators/typedef.py b/tools/net/sunrpc/xdrgen/generators/typedef.py index 6ea98445f5c8..fab72e9d6915 100644 --- a/tools/net/sunrpc/xdrgen/generators/typedef.py +++ b/tools/net/sunrpc/xdrgen/generators/typedef.py @@ -12,7 +12,7 @@ from xdr_ast import _XdrBasic, _XdrTypedef, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrVoid, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_typedef_declaration(environment: Environment, node: _XdrDeclaration) -> None: @@ -230,6 +230,18 @@ def emit_typedef_encoder(environment: Environment, node: _XdrDeclaration) -> Non raise NotImplementedError("typedef: type not recognized") +def emit_typedef_maxsize(environment: Environment, node: _XdrDeclaration) -> None: + """Emit a maxsize macro for an XDR typedef""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", node.template) + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrTypedefGenerator(SourceGenerator): """Generate source code for XDR typedefs""" @@ -253,3 +265,7 @@ class XdrTypedefGenerator(SourceGenerator): def emit_encoder(self, node: _XdrTypedef) -> None: """Emit one encoder function for an XDR typedef""" emit_typedef_encoder(self.environment, node.declaration) + + def emit_maxsize(self, node: _XdrTypedef) -> None: + """Emit one maxsize macro for an XDR typedef""" + emit_typedef_maxsize(self.environment, node.declaration) diff --git a/tools/net/sunrpc/xdrgen/generators/union.py b/tools/net/sunrpc/xdrgen/generators/union.py index 4522a5b7a943..2cca00e279cd 100644 --- a/tools/net/sunrpc/xdrgen/generators/union.py +++ b/tools/net/sunrpc/xdrgen/generators/union.py @@ -8,8 +8,8 @@ from jinja2 import Environment from generators import SourceGenerator from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, big_endian -from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis +from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, get_header_name +from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis, big_endian def emit_union_declaration(environment: Environment, node: _XdrUnion) -> None: @@ -234,6 +234,18 @@ def emit_union_encoder(environment, node: _XdrUnion) -> None: print(template.render()) +def emit_union_maxsize(environment: Environment, node: _XdrUnion) -> None: + """Emit one maxsize macro for an XDR union type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "union") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrUnionGenerator(SourceGenerator): """Generate source code for XDR unions""" @@ -257,3 +269,7 @@ class XdrUnionGenerator(SourceGenerator): def emit_encoder(self, node: _XdrUnion) -> None: """Emit one encoder function for an XDR union""" emit_union_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrUnion) -> None: + """Emit one maxsize macro for an XDR union""" + emit_union_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) From ac159338d53b8846b020be8260884e8234572a70 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Oct 2024 14:54:46 -0400 Subject: [PATCH 26/72] xdrgen: emit maxsize macros Add "definitions" subcommand logic to emit maxsize macros in generated code. Signed-off-by: Chuck Lever --- .../net/sunrpc/xdrgen/subcmds/definitions.py | 24 ++++++++++++++++--- tools/net/sunrpc/xdrgen/subcmds/source.py | 3 +-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/tools/net/sunrpc/xdrgen/subcmds/definitions.py b/tools/net/sunrpc/xdrgen/subcmds/definitions.py index 5cd13d53221f..c956e27f37c0 100644 --- a/tools/net/sunrpc/xdrgen/subcmds/definitions.py +++ b/tools/net/sunrpc/xdrgen/subcmds/definitions.py @@ -28,9 +28,7 @@ from xdr_parse import xdr_parser, set_xdr_annotate logger.setLevel(logging.INFO) -def emit_header_definitions( - root: Specification, language: str, peer: str -) -> None: +def emit_header_definitions(root: Specification, language: str, peer: str) -> None: """Emit header definitions""" for definition in root.definitions: if isinstance(definition.value, _XdrConstant): @@ -52,6 +50,25 @@ def emit_header_definitions( gen.emit_definition(definition.value) +def emit_header_maxsize(root: Specification, language: str, peer: str) -> None: + """Emit header maxsize macros""" + print("") + for definition in root.definitions: + if isinstance(definition.value, _XdrEnum): + gen = XdrEnumGenerator(language, peer) + elif isinstance(definition.value, _XdrPointer): + gen = XdrPointerGenerator(language, peer) + elif isinstance(definition.value, _XdrTypedef): + gen = XdrTypedefGenerator(language, peer) + elif isinstance(definition.value, _XdrStruct): + gen = XdrStructGenerator(language, peer) + elif isinstance(definition.value, _XdrUnion): + gen = XdrUnionGenerator(language, peer) + else: + continue + gen.emit_maxsize(definition.value) + + def handle_parse_error(e: UnexpectedInput) -> bool: """Simple parse error reporting, no recovery attempted""" print(e) @@ -71,6 +88,7 @@ def subcmd(args: Namespace) -> int: gen.emit_definition(args.filename, ast) emit_header_definitions(ast, args.language, args.peer) + emit_header_maxsize(ast, args.language, args.peer) gen = XdrHeaderBottomGenerator(args.language, args.peer) gen.emit_definition(args.filename, ast) diff --git a/tools/net/sunrpc/xdrgen/subcmds/source.py b/tools/net/sunrpc/xdrgen/subcmds/source.py index 00c04ad15b89..2024954748f0 100644 --- a/tools/net/sunrpc/xdrgen/subcmds/source.py +++ b/tools/net/sunrpc/xdrgen/subcmds/source.py @@ -83,8 +83,7 @@ def generate_client_source(filename: str, root: Specification, language: str) -> gen = XdrSourceTopGenerator(language, "client") gen.emit_source(filename, root) - # cel: todo: client needs XDR size macros - + print("") for definition in root.definitions: emit_source_encoder(definition.value, language, "client") for definition in root.definitions: From f67eef8da0e8c54709fefdecd16ad8d70f0c9d20 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Nov 2024 11:01:13 -0500 Subject: [PATCH 27/72] nfsd: drop inode parameter from nfsd4_change_attribute() The inode that nfs4_open_delegation() passes to this function is wrong, which throws off the result. The inode will end up getting a directory-style change attr instead of a regular-file-style one. Fix up nfs4_delegation_stat() to fetch STATX_MODE, and then drop the inode parameter from nfsd4_change_attribute(), since it's no longer needed. Fixes: c5967721e106 ("NFSD: handle GETATTR conflict with write delegation") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 5 ++--- fs/nfsd/nfs4xdr.c | 2 +- fs/nfsd/nfsfh.c | 20 ++++++++++++-------- fs/nfsd/nfsfh.h | 3 +-- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 551d2958ec29..d3cfc6471539 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5957,7 +5957,7 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, path.dentry = file_dentry(nf->nf_file); rc = vfs_getattr(&path, stat, - (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE), + (STATX_MODE | STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE), AT_STATX_SYNC_AS_STAT); nfsd_file_put(nf); @@ -6041,8 +6041,7 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, } open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE; dp->dl_cb_fattr.ncf_cur_fsize = stat.size; - dp->dl_cb_fattr.ncf_initial_cinfo = - nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry)); + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat); trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); } else { open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f118921250c3..8d25aef51ad1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3040,7 +3040,7 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr, return nfs_ok; } - c = nfsd4_change_attribute(&args->stat, d_inode(args->dentry)); + c = nfsd4_change_attribute(&args->stat); return nfsd4_encode_changeid4(xdr, c); } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 40ad58a6a036..96e19c50a5d7 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -667,20 +667,18 @@ fh_update(struct svc_fh *fhp) __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp) { bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode; struct kstat stat; __be32 err; if (fhp->fh_no_wcc || fhp->fh_pre_saved) return nfs_ok; - inode = d_inode(fhp->fh_dentry); err = fh_getattr(fhp, &stat); if (err) return err; if (v4) - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + fhp->fh_pre_change = nfsd4_change_attribute(&stat); fhp->fh_pre_mtime = stat.mtime; fhp->fh_pre_ctime = stat.ctime; @@ -697,7 +695,6 @@ __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp) __be32 fh_fill_post_attrs(struct svc_fh *fhp) { bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode = d_inode(fhp->fh_dentry); __be32 err; if (fhp->fh_no_wcc) @@ -713,7 +710,7 @@ __be32 fh_fill_post_attrs(struct svc_fh *fhp) fhp->fh_post_saved = true; if (v4) fhp->fh_post_change = - nfsd4_change_attribute(&fhp->fh_post_attr, inode); + nfsd4_change_attribute(&fhp->fh_post_attr); return nfs_ok; } @@ -804,7 +801,14 @@ enum fsid_source fsid_source(const struct svc_fh *fhp) return FSIDSOURCE_DEV; } -/* +/** + * nfsd4_change_attribute - Generate an NFSv4 change_attribute value + * @stat: inode attributes + * + * Caller must fill in @stat before calling, typically by invoking + * vfs_getattr() with STATX_MODE, STATX_CTIME, and STATX_CHANGE_COOKIE. + * Returns an unsigned 64-bit changeid4 value (RFC 8881 Section 3.2). + * * We could use i_version alone as the change attribute. However, i_version * can go backwards on a regular file after an unclean shutdown. On its own * that doesn't necessarily cause a problem, but if i_version goes backwards @@ -821,13 +825,13 @@ enum fsid_source fsid_source(const struct svc_fh *fhp) * assume that the new change attr is always logged to stable storage in some * fashion before the results can be seen. */ -u64 nfsd4_change_attribute(const struct kstat *stat, const struct inode *inode) +u64 nfsd4_change_attribute(const struct kstat *stat) { u64 chattr; if (stat->result_mask & STATX_CHANGE_COOKIE) { chattr = stat->change_cookie; - if (S_ISREG(inode->i_mode) && + if (S_ISREG(stat->mode) && !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { chattr += (u64)stat->ctime.tv_sec << 30; chattr += stat->ctime.tv_nsec; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 5b7394801dc4..876152a91f12 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -297,8 +297,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) fhp->fh_pre_saved = false; } -u64 nfsd4_change_attribute(const struct kstat *stat, - const struct inode *inode); +u64 nfsd4_change_attribute(const struct kstat *stat); __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp); __be32 fh_fill_post_attrs(struct svc_fh *fhp); __be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp); From c757ca1a56edff8d288d4a6cfdbb305dd03e8048 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 4 Oct 2024 09:16:44 -0400 Subject: [PATCH 28/72] nfsd: drop the ncf_cb_bmap field This is always the same value, and in a later patch we're going to need to set bits in WORD2. We can simplify this code and save a little space in the delegation too. Just hardcode the bitmap in the callback encode function. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 5 ++++- fs/nfsd/nfs4state.c | 1 - fs/nfsd/state.h | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 478b548f4147..f5ba9be91770 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -364,10 +364,13 @@ encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr, struct nfs4_delegation *dp = container_of(fattr, struct nfs4_delegation, dl_cb_fattr); struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle; + u32 bmap[1]; + + bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE; encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR); encode_nfs_fh4(xdr, fh); - encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap)); + encode_bitmap4(xdr, bmap, ARRAY_SIZE(bmap)); hdr->nops++; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d3cfc6471539..f25d18398713 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1184,7 +1184,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client, &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR); dp->dl_cb_fattr.ncf_file_modified = false; - dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE; get_nfs4_file(fp); dp->dl_stid.sc_file = fp; return dp; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 35b3564c065f..9721c6d6ec42 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -140,7 +140,6 @@ struct nfs4_cpntf_state { struct nfs4_cb_fattr { struct nfsd4_callback ncf_getattr; u32 ncf_cb_status; - u32 ncf_cb_bmap[1]; /* from CB_GETATTR reply */ u64 ncf_cb_change; From 3a405432e7cd84f5e137928ce383aa85a54fb3a6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 4 Oct 2024 09:16:45 -0400 Subject: [PATCH 29/72] nfsd: drop the nfsd4_fattr_args "size" field We already have a slot for this in the kstat structure. Just overwrite that instead of keeping a copy. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8d25aef51ad1..7f1cb1cdb67e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2928,7 +2928,6 @@ struct nfsd4_fattr_args { struct kstat stat; struct kstatfs statfs; struct nfs4_acl *acl; - u64 size; #ifdef CONFIG_NFSD_V4_SECURITY_LABEL void *context; int contextlen; @@ -3047,7 +3046,7 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr, static __be32 nfsd4_encode_fattr4_size(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { - return nfsd4_encode_uint64_t(xdr, args->size); + return nfsd4_encode_uint64_t(xdr, args->stat.size); } static __be32 nfsd4_encode_fattr4_fsid(struct xdr_stream *xdr, @@ -3555,7 +3554,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; } - args.size = 0; if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { status = nfsd4_deleg_getattr_conflict(rqstp, dentry, &file_modified, &size); @@ -3569,9 +3567,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (err) goto out_nfserr; if (file_modified) - args.size = size; - else - args.size = args.stat.size; + args.stat.size = size; if (!(args.stat.result_mask & STATX_BTIME)) /* underlying FS does not offer btime so we can't share it */ From f6259e2e4f64a1780b285037dbd4f947121ae8fd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 4 Oct 2024 09:16:46 -0400 Subject: [PATCH 30/72] nfsd: have nfsd4_deleg_getattr_conflict pass back write deleg pointer Currently we pass back the size and whether it has been modified, but those just mirror values tracked inside the delegation. In a later patch, we'll need to get at the timestamps in the delegation too, so just pass back a reference to the write delegation, and use that to properly override values in the iattr. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 17 ++++++++--------- fs/nfsd/nfs4xdr.c | 16 ++++++++++------ fs/nfsd/state.h | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f25d18398713..e0862173cd9e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8863,8 +8863,7 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context * @dentry: dentry of inode to be checked for a conflict - * @modified: return true if file was modified - * @size: new size of file if modified is true + * @pdp: returned WRITE delegation, if one was found * * This function is called when there is a conflict between a write * delegation and a change/size GETATTR from another client. The server @@ -8874,11 +8873,12 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, * 18.7.4. * * Returns 0 if there is no conflict; otherwise an nfs_stat - * code is returned. + * code is returned. If @pdp is set to a non-NULL value, then the + * caller must put the reference. */ __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, - bool *modified, u64 *size) + struct nfs4_delegation **pdp) { __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); @@ -8889,10 +8889,9 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_cb_fattr *ncf; struct inode *inode = d_inode(dentry); - *modified = false; ctx = locks_inode_context(inode); if (!ctx) - return 0; + return nfs_ok; #define NON_NFSD_LEASE ((void *)1) @@ -8958,10 +8957,10 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, goto out_status; } ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; - *size = ncf->ncf_cur_fsize; - *modified = true; + *pdp = dp; + return nfs_ok; } - status = 0; + status = nfs_ok; out_status: nfs4_put_stid(&dp->dl_stid); return status; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7f1cb1cdb67e..abbfd2b58c82 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3511,6 +3511,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, int ignore_crossmnt) { DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); + struct nfs4_delegation *dp = NULL; struct nfsd4_fattr_args args; struct svc_fh *tempfh = NULL; int starting_len = xdr->buf->len; @@ -3525,8 +3526,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, .dentry = dentry, }; unsigned long bit; - bool file_modified = false; - u64 size = 0; WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1); WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval)); @@ -3555,8 +3554,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, goto out; } if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { - status = nfsd4_deleg_getattr_conflict(rqstp, dentry, - &file_modified, &size); + status = nfsd4_deleg_getattr_conflict(rqstp, dentry, &dp); if (status) goto out; } @@ -3564,10 +3562,16 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, err = vfs_getattr(&path, &args.stat, STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE, AT_STATX_SYNC_AS_STAT); + if (dp) { + struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; + + if (ncf->ncf_file_modified) + args.stat.size = ncf->ncf_cur_fsize; + + nfs4_put_stid(&dp->dl_stid); + } if (err) goto out_nfserr; - if (file_modified) - args.stat.size = size; if (!(args.stat.result_mask & STATX_BTIME)) /* underlying FS does not offer btime so we can't share it */ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 9721c6d6ec42..da22eaff3755 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -783,5 +783,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) } extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, - struct dentry *dentry, bool *file_modified, u64 *size); + struct dentry *dentry, struct nfs4_delegation **pdp); #endif /* NFSD4_STATE_H */ From b9376c7e42ca22644085332fd450b153cd4e9bde Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 18 Oct 2024 14:45:01 -0400 Subject: [PATCH 31/72] nfsd: new tracepoint for after op_func in compound processing Turn nfsd_compound_encode_err tracepoint into a class and add a new nfsd_compound_op_err tracepoint. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 1 + fs/nfsd/trace.h | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d32f2dfd148f..93089f7064f0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2780,6 +2780,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->opdesc->op_get_currentstateid) op->opdesc->op_get_currentstateid(cstate, &op->u); op->status = op->opdesc->op_func(rqstp, cstate, &op->u); + trace_nfsd_compound_op_err(rqstp, op->opnum, op->status); /* Only from SEQUENCE */ if (cstate->status == nfserr_replay_cache) { diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index b8470d4cbe99..344803cf8004 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -163,7 +163,7 @@ TRACE_EVENT(nfsd_compound_decode_err, __entry->opnum, __entry->status) ); -TRACE_EVENT(nfsd_compound_encode_err, +DECLARE_EVENT_CLASS(nfsd_compound_err_class, TP_PROTO( const struct svc_rqst *rqstp, u32 opnum, @@ -184,6 +184,18 @@ TRACE_EVENT(nfsd_compound_encode_err, __entry->opnum, __entry->status) ); +#define DEFINE_NFSD_COMPOUND_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_compound_err_class, nfsd_compound_##name##_err, \ + TP_PROTO( \ + const struct svc_rqst *rqstp, \ + u32 opnum, \ + __be32 status \ + ), \ + TP_ARGS(rqstp, opnum, status)) + +DEFINE_NFSD_COMPOUND_ERR_EVENT(op); +DEFINE_NFSD_COMPOUND_ERR_EVENT(encode); + #define show_fs_file_type(x) \ __print_symbolic(x, \ { S_IFLNK, "LNK" }, \ From 2dc84a75229c37e350dd1a83aaf4a63dc2ba86f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 13 Sep 2024 00:53:20 +0200 Subject: [PATCH 32/72] lockd: Fix comment about NLMv3 backwards compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NLMv2 is completely different protocol than NLMv1 and NLMv3, and in original Sun implementation is used for RPC loopback callbacks from statd to lockd services. Linux does not use nor does not implement NLMv2. Hence, NLMv3 is not backward compatible with NLMv2. But NLMv3 is backward compatible with NLMv1. Fix comment. Signed-off-by: Pali Rohár Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/lockd/clntxdr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index a3e97278b997..6ea3448d2d31 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -2,8 +2,9 @@ /* * linux/fs/lockd/clntxdr.c * - * XDR functions to encode/decode NLM version 3 RPC arguments and results. - * NLM version 3 is backwards compatible with NLM versions 1 and 2. + * XDR functions to encode/decode NLM version 1 and 3 RPC + * arguments and results. NLM version 2 is not specified + * by a standard, thus it is not implemented. * * NLM client-side only. * From 600020927b004f027e737e6bf57c450d48f2405e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 5 Oct 2024 20:33:49 +0200 Subject: [PATCH 33/72] nfsd: Fill NFSv4.1 server implementation fields in OP_EXCHANGE_ID response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NFSv4.1 OP_EXCHANGE_ID response from server may contain server implementation details (domain, name and build time) in optional nfs_impl_id4 field. Currently nfsd does not fill this field. Send these information in NFSv4.1 OP_EXCHANGE_ID response. Fill them with the same values as what is Linux NFSv4.1 client doing. Domain is hardcoded to "kernel.org", name is composed in the same way as "uname -srvm" output and build time is hardcoded to zeros. NFSv4.1 client and server implementation fields are useful for statistic purposes or for identifying type of clients and servers. Signed-off-by: Pali Rohár Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 1 + fs/nfsd/nfs4state.c | 31 +++++++++++++++++++++++++++++++ fs/nfsd/nfs4xdr.c | 24 +++++++++++++++++++++++- fs/nfsd/xdr4.h | 2 ++ 4 files changed, 57 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 93089f7064f0..49b4cbfe914d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -3453,6 +3453,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { /* NFSv4.1 operations */ [OP_EXCHANGE_ID] = { .op_func = nfsd4_exchange_id, + .op_release = nfsd4_exchange_id_release, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP | OP_MODIFIES_SOMETHING, .op_name = "OP_EXCHANGE_ID", diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e0862173cd9e..2f229d007b58 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3524,6 +3524,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); + exid->server_impl_name = kasprintf(GFP_KERNEL, "%s %s %s %s", + utsname()->sysname, utsname()->release, + utsname()->version, utsname()->machine); + if (!exid->server_impl_name) + return nfserr_jukebox; + if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; @@ -3661,6 +3667,23 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, exid->seqid = conf->cl_cs_slot.sl_seqid + 1; nfsd4_set_ex_flags(conf, exid); + exid->nii_domain.len = sizeof("kernel.org") - 1; + exid->nii_domain.data = "kernel.org"; + + /* + * Note that RFC 8881 places no length limit on + * nii_name, but this implementation permits no + * more than NFS4_OPAQUE_LIMIT bytes. + */ + exid->nii_name.len = strlen(exid->server_impl_name); + if (exid->nii_name.len > NFS4_OPAQUE_LIMIT) + exid->nii_name.len = NFS4_OPAQUE_LIMIT; + exid->nii_name.data = exid->server_impl_name; + + /* just send zeros - the date is in nii_name */ + exid->nii_time.tv_sec = 0; + exid->nii_time.tv_nsec = 0; + dprintk("nfsd4_exchange_id seqid %d flags %x\n", conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags); status = nfs_ok; @@ -3677,6 +3700,14 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } +void +nfsd4_exchange_id_release(union nfsd4_op_u *u) +{ + struct nfsd4_exchange_id *exid = &u->exchange_id; + + kfree(exid->server_impl_name); +} + static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse) { /* The slot is in use, and no response has been sent. */ diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index abbfd2b58c82..37f8301cfb8a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4825,6 +4825,25 @@ nfsd4_encode_server_owner4(struct xdr_stream *xdr, struct svc_rqst *rqstp) return nfsd4_encode_opaque(xdr, nn->nfsd_name, strlen(nn->nfsd_name)); } +static __be32 +nfsd4_encode_nfs_impl_id4(struct xdr_stream *xdr, struct nfsd4_exchange_id *exid) +{ + __be32 status; + + /* nii_domain */ + status = nfsd4_encode_opaque(xdr, exid->nii_domain.data, + exid->nii_domain.len); + if (status != nfs_ok) + return status; + /* nii_name */ + status = nfsd4_encode_opaque(xdr, exid->nii_name.data, + exid->nii_name.len); + if (status != nfs_ok) + return status; + /* nii_time */ + return nfsd4_encode_nfstime4(xdr, &exid->nii_time); +} + static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) @@ -4859,8 +4878,11 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr != nfs_ok) return nfserr; /* eir_server_impl_id<1> */ - if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) + if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT) return nfserr_resource; + nfserr = nfsd4_encode_nfs_impl_id4(xdr, exid); + if (nfserr != nfs_ok) + return nfserr; return nfs_ok; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 2a21a7662e03..d5bb9a3c2da4 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -567,6 +567,7 @@ struct nfsd4_exchange_id { struct xdr_netobj nii_domain; struct xdr_netobj nii_name; struct timespec64 nii_time; + char *server_impl_name; }; struct nfsd4_sequence { @@ -930,6 +931,7 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); +void nfsd4_exchange_id_release(union nfsd4_op_u *u); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, From bb4f07f2409c26c01e97e6f9b432545f353e3b66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 5 Oct 2024 18:40:39 +0200 Subject: [PATCH 34/72] nfsd: Fix NFSD_MAY_BYPASS_GSS and NFSD_MAY_BYPASS_GSS_ON_ROOT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently NFSD_MAY_BYPASS_GSS and NFSD_MAY_BYPASS_GSS_ON_ROOT do not bypass only GSS, but bypass any method. This is a problem specially for NFS3 AUTH_NULL-only exports. The purpose of NFSD_MAY_BYPASS_GSS_ON_ROOT is described in RFC 2623, section 2.3.2, to allow mounting NFS2/3 GSS-only export without authentication. So few procedures which do not expose security risk used during mount time can be called also with AUTH_NONE or AUTH_SYS, to allow client mount operation to finish successfully. The problem with current implementation is that for AUTH_NULL-only exports, the NFSD_MAY_BYPASS_GSS_ON_ROOT is active also for NFS3 AUTH_UNIX mount attempts which confuse NFS3 clients, and make them think that AUTH_UNIX is enabled and is working. Linux NFS3 client never switches from AUTH_UNIX to AUTH_NONE on active mount, which makes the mount inaccessible. Fix the NFSD_MAY_BYPASS_GSS and NFSD_MAY_BYPASS_GSS_ON_ROOT implementation and really allow to bypass only exports which have enabled some real authentication (GSS, TLS, or any other). The result would be: For AUTH_NULL-only export if client attempts to do mount with AUTH_UNIX flavor then it will receive access errors, which instruct client that AUTH_UNIX flavor is not usable and will either try other auth flavor (AUTH_NULL if enabled) or fails mount procedure. Similarly if client attempt to do mount with AUTH_NULL flavor and only AUTH_UNIX flavor is enabled then the client will receive access error. This should fix problems with AUTH_NULL-only or AUTH_UNIX-only exports if client attempts to mount it with other auth flavor (e.g. with AUTH_NULL for AUTH_UNIX-only export, or with AUTH_UNIX for AUTH_NULL-only export). Signed-off-by: Pali Rohár Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/export.c | 21 ++++++++++++++++++++- fs/nfsd/export.h | 3 ++- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4xdr.c | 2 +- fs/nfsd/nfsfh.c | 9 ++++++--- fs/nfsd/vfs.c | 2 +- 6 files changed, 31 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c82d8e3e0d4f..0f40003d864f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1078,12 +1078,14 @@ static struct svc_export *exp_find(struct cache_detail *cd, * check_nfsd_access - check if access to export is allowed. * @exp: svc_export that is being accessed. * @rqstp: svc_rqst attempting to access @exp (will be NULL for LOCALIO). + * @may_bypass_gss: reduce strictness of authorization check * * Return values: * %nfs_ok if access is granted, or * %nfserr_wrongsec if access is denied */ -__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, + bool may_bypass_gss) { struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors; struct svc_xprt *xprt; @@ -1140,6 +1142,23 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) if (nfsd4_spo_must_allow(rqstp)) return nfs_ok; + /* Some calls may be processed without authentication + * on GSS exports. For example NFS2/3 calls on root + * directory, see section 2.3.2 of rfc 2623. + * For "may_bypass_gss" check that export has really + * enabled some flavor with authentication (GSS or any + * other) and also check that the used auth flavor is + * without authentication (none or sys). + */ + if (may_bypass_gss && ( + rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL || + rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)) { + for (f = exp->ex_flavors; f < end; f++) { + if (f->pseudoflavor >= RPC_AUTH_DES) + return 0; + } + } + denied: return nfserr_wrongsec; } diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 3794ae253a70..4d92b99c1ffd 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -101,7 +101,8 @@ struct svc_expkey { struct svc_cred; int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp); -__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, + bool may_bypass_gss); /* * Function declarations diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 49b4cbfe914d..94cc7b2ac385 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2797,7 +2797,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (current_fh->fh_export && need_wrongsec_check(rqstp)) - op->status = check_nfsd_access(current_fh->fh_export, rqstp); + op->status = check_nfsd_access(current_fh->fh_export, rqstp, false); } encode_op: if (op->status == nfserr_replay_me) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 37f8301cfb8a..5e6ef24d5450 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3767,7 +3767,7 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name, nfserr = nfserrno(err); goto out_put; } - nfserr = check_nfsd_access(exp, cd->rd_rqstp); + nfserr = check_nfsd_access(exp, cd->rd_rqstp, false); if (nfserr) goto out_put; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 96e19c50a5d7..cbb046f88eec 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -320,6 +320,7 @@ __fh_verify(struct svc_rqst *rqstp, { struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_export *exp = NULL; + bool may_bypass_gss = false; struct dentry *dentry; __be32 error; @@ -367,8 +368,10 @@ __fh_verify(struct svc_rqst *rqstp, * which clients virtually always use auth_sys for, * even while using RPCSEC_GSS for NFS. */ - if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) + if (access & NFSD_MAY_LOCK) goto skip_pseudoflavor_check; + if (access & NFSD_MAY_BYPASS_GSS) + may_bypass_gss = true; /* * Clients may expect to be able to use auth_sys during mount, * even if they use gss for everything else; see section 2.3.2 @@ -376,9 +379,9 @@ __fh_verify(struct svc_rqst *rqstp, */ if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT && exp->ex_path.dentry == dentry) - goto skip_pseudoflavor_check; + may_bypass_gss = true; - error = check_nfsd_access(exp, rqstp); + error = check_nfsd_access(exp, rqstp, may_bypass_gss); if (error) goto out; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e0692401da33..1f8540148c4a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -321,7 +321,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err; - err = check_nfsd_access(exp, rqstp); + err = check_nfsd_access(exp, rqstp, false); if (err) goto out; /* From a32442f6ca32cf402a76856d5e713bd742481ba2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 7 Oct 2024 14:07:54 -0400 Subject: [PATCH 35/72] xdrgen: Add a utility for extracting XDR from RFCs For convenience, copy the XDR extraction script from RFC Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- MAINTAINERS | 1 + tools/net/sunrpc/extract.sh | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100755 tools/net/sunrpc/extract.sh diff --git a/MAINTAINERS b/MAINTAINERS index 21fdaa19229a..f711449753b3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12367,6 +12367,7 @@ F: include/trace/misc/sunrpc.h F: include/uapi/linux/nfsd/ F: include/uapi/linux/sunrpc/ F: net/sunrpc/ +F: tools/net/sunrpc/ KERNEL PACMAN PACKAGING (in addition to generic KERNEL BUILD) M: Thomas Weißschuh diff --git a/tools/net/sunrpc/extract.sh b/tools/net/sunrpc/extract.sh new file mode 100755 index 000000000000..f944066f25bc --- /dev/null +++ b/tools/net/sunrpc/extract.sh @@ -0,0 +1,11 @@ +#! /bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Extract an RPC protocol specification from an RFC document. +# The version of this script comes from RFC 8166. +# +# Usage: +# $ extract.sh < rfcNNNN.txt > protocol.x +# + +grep '^ *///' | sed 's?^ */// ??' | sed 's?^ *///$??' From ed9887b876c957c9c9a0486cf0edf7c964e99cb9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Oct 2024 22:16:59 +0200 Subject: [PATCH 36/72] nfsd: replace call_rcu by kfree_rcu for simple kmem_cache_free callback Since SLOB was removed and since commit 6c6c47b063b5 ("mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()"), it is not necessary to use call_rcu when the callback only performs kmem_cache_free. Use kfree_rcu() directly. The changes were made using Coccinelle. Signed-off-by: Julia Lawall Reviewed-by: Jeff Layton Acked-by: Paul E. McKenney Acked-by: Vlastimil Babka Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2f229d007b58..291db55c90e4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -572,13 +572,6 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -static void nfsd4_free_file_rcu(struct rcu_head *rcu) -{ - struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu); - - kmem_cache_free(file_slab, fp); -} - void put_nfs4_file(struct nfs4_file *fi) { @@ -586,7 +579,7 @@ put_nfs4_file(struct nfs4_file *fi) nfsd4_file_hash_remove(fi); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); - call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); + kfree_rcu(fi, fi_rcu); } } From 6640556b0c80edc66d6f50abe53f00311a873536 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 13 Oct 2024 16:02:28 -0400 Subject: [PATCH 37/72] NFSD: Replace use of NFSD_MAY_LOCK in nfsd4_lock() NFSv4 LOCK operations should not avoid the set of authorization checks that apply to all other NFSv4 operations. Also, the "no_auth_nlm" export option should apply only to NLM LOCK requests. It's not necessary or sensible to apply it to NFSv4 LOCK operations. Instead, set no permission bits when calling fh_verify(). Subsequent stateid processing handles authorization checks. Reported-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 291db55c90e4..2efceeea0fe3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7965,11 +7965,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(lock->lk_offset, lock->lk_length)) return nfserr_inval; - if ((status = fh_verify(rqstp, &cstate->current_fh, - S_IFREG, NFSD_MAY_LOCK))) { - dprintk("NFSD: nfsd4_lock: permission denied!\n"); + status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); + if (status != nfs_ok) return status; - } sb = cstate->current_fh.fh_dentry->d_sb; if (lock->lk_is_new) { From 4cc9b9f2bf4dfe13fe573da978e626e2248df388 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 18 Oct 2024 08:42:31 +1100 Subject: [PATCH 38/72] nfsd: refine and rename NFSD_MAY_LOCK NFSD_MAY_LOCK means a few different things. - it means that GSS is not required. - it means that with NFSEXP_NOAUTHNLM, authentication is not required - it means that OWNER_OVERRIDE is allowed. None of these are specific to locking, they are specific to the NLM protocol. So: - rename to NFSD_MAY_NLM - set NFSD_MAY_OWNER_OVERRIDE and NFSD_MAY_BYPASS_GSS in nlm_fopen() so that NFSD_MAY_NLM doesn't need to imply these. - move the test on NFSEXP_NOAUTHNLM out of nfsd_permission() and into fh_verify where other special-case tests on the MAY flags happen. nfsd_permission() can be called from other places than fh_verify(), but none of these will have NFSD_MAY_NLM. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/lockd.c | 13 +++++++++++-- fs/nfsd/nfsfh.c | 12 ++++-------- fs/nfsd/trace.h | 2 +- fs/nfsd/vfs.c | 12 +----------- fs/nfsd/vfs.h | 2 +- 5 files changed, 18 insertions(+), 23 deletions(-) diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 46a7f9b813e5..edc9f75dc75c 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -38,11 +38,20 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp, memcpy(&fh.fh_handle.fh_raw, f->data, f->size); fh.fh_export = NULL; + /* + * Allow BYPASS_GSS as some client implementations use AUTH_SYS + * for NLM even when GSS is used for NFS. + * Allow OWNER_OVERRIDE as permission might have been changed + * after the file was opened. + * Pass MAY_NLM so that authentication can be completely bypassed + * if NFSEXP_NOAUTHNLM is set. Some older clients use AUTH_NULL + * for NLM requests. + */ access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ; - access |= NFSD_MAY_LOCK; + access |= NFSD_MAY_NLM | NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_BYPASS_GSS; nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp); fh_put(&fh); - /* We return nlm error codes as nlm doesn't know + /* We return nlm error codes as nlm doesn't know * about nfsd, but nfsd does know about nlm.. */ switch (nfserr) { diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index cbb046f88eec..871de925a3df 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -363,13 +363,10 @@ __fh_verify(struct svc_rqst *rqstp, if (error) goto out; - /* - * pseudoflavor restrictions are not enforced on NLM, - * which clients virtually always use auth_sys for, - * even while using RPCSEC_GSS for NFS. - */ - if (access & NFSD_MAY_LOCK) - goto skip_pseudoflavor_check; + if ((access & NFSD_MAY_NLM) && (exp->ex_flags & NFSEXP_NOAUTHNLM)) + /* NLM is allowed to fully bypass authentication */ + goto out; + if (access & NFSD_MAY_BYPASS_GSS) may_bypass_gss = true; /* @@ -385,7 +382,6 @@ __fh_verify(struct svc_rqst *rqstp, if (error) goto out; -skip_pseudoflavor_check: /* Finally, check access permissions. */ error = nfsd_permission(cred, exp, dentry, access); out: diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 344803cf8004..f318898cfc31 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -79,7 +79,7 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); { NFSD_MAY_READ, "READ" }, \ { NFSD_MAY_SATTR, "SATTR" }, \ { NFSD_MAY_TRUNC, "TRUNC" }, \ - { NFSD_MAY_LOCK, "LOCK" }, \ + { NFSD_MAY_NLM, "NLM" }, \ { NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \ { NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \ { NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 1f8540148c4a..eee39bab49a8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2506,7 +2506,7 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp, (acc & NFSD_MAY_EXEC)? " exec" : "", (acc & NFSD_MAY_SATTR)? " sattr" : "", (acc & NFSD_MAY_TRUNC)? " trunc" : "", - (acc & NFSD_MAY_LOCK)? " lock" : "", + (acc & NFSD_MAY_NLM)? " nlm" : "", (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "", inode->i_mode, IS_IMMUTABLE(inode)? " immut" : "", @@ -2531,16 +2531,6 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp, if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode)) return nfserr_perm; - if (acc & NFSD_MAY_LOCK) { - /* If we cannot rely on authentication in NLM requests, - * just allow locks, otherwise require read permission, or - * ownership - */ - if (exp->ex_flags & NFSEXP_NOAUTHNLM) - return 0; - else - acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE; - } /* * The file owner always gets access permission for accesses that * would normally be checked at open time. This is to make diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 854fb95dfdca..f9b09b842856 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -20,7 +20,7 @@ #define NFSD_MAY_READ 0x004 /* == MAY_READ */ #define NFSD_MAY_SATTR 0x008 #define NFSD_MAY_TRUNC 0x010 -#define NFSD_MAY_LOCK 0x020 +#define NFSD_MAY_NLM 0x020 /* request is from lockd */ #define NFSD_MAY_MASK 0x03f /* extra hints to permission and open routines: */ From d08bf5ea649c045175c191609ccd52644b85985f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 11:03:51 -0400 Subject: [PATCH 39/72] NFSD: Remove dead code in nfsd4_create_session() Clean up. AFAICT, there is no way to reach the out_free_conn label with @old set to a non-NULL value, so the expire_client(old) call is never reached and can be removed. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2efceeea0fe3..133117b6c7cc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3954,7 +3954,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, return status; out_expired_error: - old = NULL; /* * Revert the slot seq_nr change so the server will process * the client's resend instead of returning a cached response. @@ -3969,8 +3968,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, out_free_conn: spin_unlock(&nn->client_lock); free_conn(conn); - if (old) - expire_client(old); out_free_session: __free_session(new); out_release_drc_mem: From da4f777e623936d4d93427b69fca37baefd62669 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 11:03:52 -0400 Subject: [PATCH 40/72] NFSD: Remove a never-true comparison fh_size is an unsigned int, thus it can never be less than 0. Fixes: d8b26071e65e ("NFSD: simplify struct nfsfh") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsfh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 871de925a3df..6a831cb242df 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -766,7 +766,7 @@ char * SVCFH_fmt(struct svc_fh *fhp) struct knfsd_fh *fh = &fhp->fh_handle; static char buf[2+1+1+64*3+1]; - if (fh->fh_size < 0 || fh->fh_size> 64) + if (fh->fh_size > 64) return "bad-fh"; sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw); return buf; From 1e02c641c3a43c88cecc08402000418e15578d38 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 11:03:53 -0400 Subject: [PATCH 41/72] NFSD: Prevent NULL dereference in nfsd4_process_cb_update() @ses is initialized to NULL. If __nfsd4_find_backchannel() finds no available backchannel session, setup_callback_client() will try to dereference @ses and segfault. Fixes: dcbeaa68dbbd ("nfsd4: allow backchannel recovery") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index f5ba9be91770..1ffac2b32d81 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1464,6 +1464,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) ses = c->cn_session; } spin_unlock(&clp->cl_lock); + if (!c) + return; err = setup_callback_client(clp, &conn, ses); if (err) { From 6b9c1080a69ee4d37e88f3743c13a45cce6afcbf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 11:03:54 -0400 Subject: [PATCH 42/72] NFSD: Remove unused results in nfsd4_encode_pathname4() Clean up. The result of "*p++" is saved, but is not used before it is overwritten. The result of xdr_encode_opaque() is saved each time through the loop but is never used. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5e6ef24d5450..a3a0d27e51e6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2699,7 +2699,6 @@ static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr, const struct path *path) { struct path cur = *path; - __be32 *p; struct dentry **components = NULL; unsigned int ncomponents = 0; __be32 err = nfserr_jukebox; @@ -2730,24 +2729,19 @@ static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr, components[ncomponents++] = cur.dentry; cur.dentry = dget_parent(cur.dentry); } - err = nfserr_resource; - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_free; - *p++ = cpu_to_be32(ncomponents); + err = nfserr_resource; + if (xdr_stream_encode_u32(xdr, ncomponents) != XDR_UNIT) + goto out_free; while (ncomponents) { struct dentry *dentry = components[ncomponents - 1]; - unsigned int len; spin_lock(&dentry->d_lock); - len = dentry->d_name.len; - p = xdr_reserve_space(xdr, len + 4); - if (!p) { + if (xdr_stream_encode_opaque(xdr, dentry->d_name.name, + dentry->d_name.len) < 0) { spin_unlock(&dentry->d_lock); goto out_free; } - p = xdr_encode_opaque(p, dentry->d_name.name, len); dprintk("/%pd", dentry); spin_unlock(&dentry->d_lock); dput(dentry); From 30c1d2411acd6d9d987f5f804aa173abb3b097ce Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 11:03:55 -0400 Subject: [PATCH 43/72] NFSD: Remove unused values from nfsd4_encode_components_esc() Clean up. The computed value of @p is saved each time through the loop but is never used. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a3a0d27e51e6..53fac037611c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2652,13 +2652,10 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, strlen = end - str; if (strlen) { - p = xdr_reserve_space(xdr, strlen + 4); - if (!p) + if (xdr_stream_encode_opaque(xdr, str, strlen) < 0) return nfserr_resource; - p = xdr_encode_opaque(p, str, strlen); count++; - } - else + } else end++; if (found_esc) end = next; From f64ea4af43161bb86ffc77e6aeb5bcf5c3229df0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 11:03:56 -0400 Subject: [PATCH 44/72] NFSD: Cap the number of bytes copied by nfs4_reset_recoverydir() It's only current caller already length-checks the string, but let's be safe. Fixes: 0964a3d3f1aa ("[PATCH] knfsd: nfsd4 reboot dirname fix") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4recover.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index b7d61eb8afe9..4a765555bf84 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -659,7 +659,8 @@ nfs4_reset_recoverydir(char *recdir) return status; status = -ENOTDIR; if (d_is_dir(path.dentry)) { - strcpy(user_recovery_dirname, recdir); + strscpy(user_recovery_dirname, recdir, + sizeof(user_recovery_dirname)); status = 0; } path_put(&path); From 2f746e40e9baae878f8193e09d8f6d601dce42bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 09:36:27 -0400 Subject: [PATCH 45/72] lockd: Remove unused typedef Clean up: Looks like the last usage of this typedef was removed by commit 026fec7e7c47 ("sunrpc: properly type pc_decode callbacks") in 2017. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/lockd/xdr.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 80cca9426761..17d53165d9f2 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -73,8 +73,6 @@ struct nlm_args { u32 fsm_mode; }; -typedef struct nlm_args nlm_args; - /* * Generic lockd result */ From e5948841285b1ad5bc3234a2f6d0586eceabfbdc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 09:36:28 -0400 Subject: [PATCH 46/72] lockd: Remove unnecessary memset() Since commit 103cc1fafee4 ("SUNRPC: Parametrize how much of argsize should be zeroed") (and possibly long before that, even) all of the memory underlying rqstp->rq_argp is zeroed already. There's no need for the memset() in nlm4svc_decode_shareargs(). Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/lockd/xdr4.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 3d28b9c3ed15..60466b8bac58 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -268,7 +268,6 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; - memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32)0; From a872c7313ec55263e11026163f081069a8c896c6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 09:36:29 -0400 Subject: [PATCH 47/72] lockd: Remove some snippets of unfinished code Clean up. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/lockd/svc4proc.c | 12 ------------ fs/lockd/svcproc.c | 12 ------------ 2 files changed, 24 deletions(-) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 8a72c418cdcc..1d0400d94b3d 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -142,18 +142,6 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; -#if 0 - /* If supplied state doesn't match current state, we assume it's - * an old request that time-warped somehow. Any error return would - * do in this case because it's irrelevant anyway. - * - * NB: We don't retrieve the remote host's state yet. - */ - if (host->h_nsmstate && host->h_nsmstate != argp->state) { - resp->status = nlm_lck_denied_nolocks; - } else -#endif - /* Now try to lock the file */ resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, argp->block, &argp->cookie, diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index a03220e66ce0..d959a5dbe0ae 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -165,18 +165,6 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; -#if 0 - /* If supplied state doesn't match current state, we assume it's - * an old request that time-warped somehow. Any error return would - * do in this case because it's irrelevant anyway. - * - * NB: We don't retrieve the remote host's state yet. - */ - if (host->h_nsmstate && host->h_nsmstate != argp->state) { - resp->status = nlm_lck_denied_nolocks; - } else -#endif - /* Now try to lock the file */ resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, argp->block, &argp->cookie, From 8994a512e2598c0bf1b6e9ece1e8292976b0dd65 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 09:36:30 -0400 Subject: [PATCH 48/72] lockd: Remove unused parameter to nlmsvc_testlock() The nlm_cookie parameter has been unused since commit 09802fd2a8ca ("lockd: rip out deferred lock handling from testlock codepath"). Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/lockd/svc4proc.c | 3 ++- fs/lockd/svclock.c | 2 +- fs/lockd/svcproc.c | 3 ++- include/linux/lockd/lockd.h | 6 +++--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 1d0400d94b3d..2cb603013111 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -108,7 +108,8 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) test_owner = argp->lock.fl.c.flc_owner; /* Now check for conflicting locks */ - resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie); + resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, + &resp->lock); if (resp->status == nlm_drop_reply) rc = rpc_drop_reply; else diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 1f2149db10f2..33e1fd159934 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -609,7 +609,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, __be32 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_host *host, struct nlm_lock *lock, - struct nlm_lock *conflock, struct nlm_cookie *cookie) + struct nlm_lock *conflock) { int error; int mode; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index d959a5dbe0ae..f53d5177f267 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -130,7 +130,8 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) test_owner = argp->lock.fl.c.flc_owner; /* Now check for conflicting locks */ - resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie)); + resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, + &argp->lock, &resp->lock)); if (resp->status == nlm_drop_reply) rc = rpc_drop_reply; else diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 61c4b9c41904..c8f0f9458f2c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -278,9 +278,9 @@ __be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, int, struct nlm_cookie *, int); __be32 nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *); -__be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, - struct nlm_host *, struct nlm_lock *, - struct nlm_lock *, struct nlm_cookie *); +__be32 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, + struct nlm_host *host, struct nlm_lock *lock, + struct nlm_lock *conflock); __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *); void nlmsvc_retry_blocked(struct svc_rqst *rqstp); void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, From 9189d23b835cec646ba5010db35d1557a77c5857 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2024 09:36:31 -0400 Subject: [PATCH 49/72] lockd: Remove unneeded initialization of file_lock::c.flc_flags Since commit 75c7940d2a86 ("lockd: set missing fl_flags field when retrieving args"), nlmsvc_retrieve_args() initializes the flc_flags field. svcxdr_decode_lock() no longer needs to do this. This clean up removes one dependency on the nlm_lock:fl field. No behavior change is expected. Analysis: svcxdr_decode_lock() is called by: nlm4svc_decode_testargs() nlm4svc_decode_lockargs() nlm4svc_decode_cancargs() nlm4svc_decode_unlockargs() nlm4svc_decode_testargs() is used by: - NLMPROC4_TEST and NLMPROC4_TEST_MSG, which call nlmsvc_retrieve_args() - NLMPROC4_GRANTED and NLMPROC4_GRANTED_MSG, which don't pass the lock's file_lock to the generic lock API nlm4svc_decode_lockargs() is used by: - NLMPROC4_LOCK and NLM4PROC4_LOCK_MSG, which call nlmsvc_retrieve_args() - NLMPROC4_UNLOCK and NLM4PROC4_UNLOCK_MSG, which call nlmsvc_retrieve_args() - NLMPROC4_NM_LOCK, which calls nlmsvc_retrieve_args() nlm4svc_decode_cancargs() is used by: - NLMPROC4_CANCEL and NLMPROC4_CANCEL_MSG, which call nlmsvc_retrieve_args() nlm4svc_decode_unlockargs() is used by: - NLMPROC4_UNLOCK and NLMPROC4_UNLOCK_MSG, which call nlmsvc_retrieve_args() All callers except GRANTED/GRANTED_MSG eventually call nlmsvc_retrieve_args() before using nlm_lock::fl.c.flc_flags. Thus this change is safe. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/lockd/svc4proc.c | 5 +++-- fs/lockd/xdr4.c | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 2cb603013111..109e5caae8c7 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -46,14 +46,15 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, if (filp != NULL) { int mode = lock_to_openmode(&lock->fl); + lock->fl.c.flc_flags = FL_POSIX; + error = nlm_lookup_file(rqstp, &file, lock); if (error) goto no_locks; *filp = file; /* Set up the missing parts of the file_lock structure */ - lock->fl.c.flc_flags = FL_POSIX; - lock->fl.c.flc_file = file->f_file[mode]; + lock->fl.c.flc_file = file->f_file[mode]; lock->fl.c.flc_pid = current->tgid; lock->fl.fl_start = (loff_t)lock->lock_start; lock->fl.fl_end = lock->lock_len ? diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 60466b8bac58..e343c820301f 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -89,7 +89,6 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) return false; locks_init_lock(fl); - fl->c.flc_flags = FL_POSIX; fl->c.flc_type = F_RDLCK; nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len); return true; From be8f982c369c965faffa198b46060f8853e0f1f0 Mon Sep 17 00:00:00 2001 From: Yang Erkun Date: Mon, 21 Oct 2024 22:23:41 +0800 Subject: [PATCH 50/72] nfsd: make sure exp active before svc_export_show The function `e_show` was called with protection from RCU. This only ensures that `exp` will not be freed. Therefore, the reference count for `exp` can drop to zero, which will trigger a refcount use-after-free warning when `exp_get` is called. To resolve this issue, use `cache_get_rcu` to ensure that `exp` remains active. ------------[ cut here ]------------ refcount_t: addition on 0; use-after-free. WARNING: CPU: 3 PID: 819 at lib/refcount.c:25 refcount_warn_saturate+0xb1/0x120 CPU: 3 UID: 0 PID: 819 Comm: cat Not tainted 6.12.0-rc3+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 RIP: 0010:refcount_warn_saturate+0xb1/0x120 ... Call Trace: e_show+0x20b/0x230 [nfsd] seq_read_iter+0x589/0x770 seq_read+0x1e5/0x270 vfs_read+0x125/0x530 ksys_read+0xc1/0x160 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: bf18f163e89c ("NFSD: Using exp_get for export getting") Cc: stable@vger.kernel.org # 4.20+ Signed-off-by: Yang Erkun Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/export.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 0f40003d864f..aa4712362b3b 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1425,9 +1425,12 @@ static int e_show(struct seq_file *m, void *p) return 0; } - exp_get(exp); + if (!cache_get_rcu(&exp->h)) + return 0; + if (cache_check(cd, &exp->h, NULL)) return 0; + exp_put(exp); return svc_export_show(m, cd, cp); } From 2862eee078a4d2d1f584e7f24fa50dddfa5f3471 Mon Sep 17 00:00:00 2001 From: Yang Erkun Date: Mon, 21 Oct 2024 22:23:42 +0800 Subject: [PATCH 51/72] SUNRPC: make sure cache entry active before cache_show The function `c_show` was called with protection from RCU. This only ensures that `cp` will not be freed. Therefore, the reference count for `cp` can drop to zero, which will trigger a refcount use-after-free warning when `cache_get` is called. To resolve this issue, use `cache_get_rcu` to ensure that `cp` remains active. ------------[ cut here ]------------ refcount_t: addition on 0; use-after-free. WARNING: CPU: 7 PID: 822 at lib/refcount.c:25 refcount_warn_saturate+0xb1/0x120 CPU: 7 UID: 0 PID: 822 Comm: cat Not tainted 6.12.0-rc3+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 RIP: 0010:refcount_warn_saturate+0xb1/0x120 Call Trace: c_show+0x2fc/0x380 [sunrpc] seq_read_iter+0x589/0x770 seq_read+0x1e5/0x270 proc_reg_read+0xe1/0x140 vfs_read+0x125/0x530 ksys_read+0xc1/0x160 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Yang Erkun Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 1bd3e531b0e0..059f6ef1ad18 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1427,7 +1427,9 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n", convert_to_wallclock(cp->expiry_time), kref_read(&cp->ref), cp->flags); - cache_get(cp); + if (!cache_get_rcu(cp)) + return 0; + if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ seq_puts(m, "# "); From f8c989a0c89a75d30f899a7cabdc14d72522bb8d Mon Sep 17 00:00:00 2001 From: Yang Erkun Date: Mon, 21 Oct 2024 22:23:43 +0800 Subject: [PATCH 52/72] nfsd: release svc_expkey/svc_export with rcu_work The last reference for `cache_head` can be reduced to zero in `c_show` and `e_show`(using `rcu_read_lock` and `rcu_read_unlock`). Consequently, `svc_export_put` and `expkey_put` will be invoked, leading to two issues: 1. The `svc_export_put` will directly free ex_uuid. However, `e_show`/`c_show` will access `ex_uuid` after `cache_put`, which can trigger a use-after-free issue, shown below. ================================================================== BUG: KASAN: slab-use-after-free in svc_export_show+0x362/0x430 [nfsd] Read of size 1 at addr ff11000010fdc120 by task cat/870 CPU: 1 UID: 0 PID: 870 Comm: cat Not tainted 6.12.0-rc3+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 Call Trace: dump_stack_lvl+0x53/0x70 print_address_description.constprop.0+0x2c/0x3a0 print_report+0xb9/0x280 kasan_report+0xae/0xe0 svc_export_show+0x362/0x430 [nfsd] c_show+0x161/0x390 [sunrpc] seq_read_iter+0x589/0x770 seq_read+0x1e5/0x270 proc_reg_read+0xe1/0x140 vfs_read+0x125/0x530 ksys_read+0xc1/0x160 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e Allocated by task 830: kasan_save_stack+0x20/0x40 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 __kmalloc_node_track_caller_noprof+0x1bc/0x400 kmemdup_noprof+0x22/0x50 svc_export_parse+0x8a9/0xb80 [nfsd] cache_do_downcall+0x71/0xa0 [sunrpc] cache_write_procfs+0x8e/0xd0 [sunrpc] proc_reg_write+0xe1/0x140 vfs_write+0x1a5/0x6d0 ksys_write+0xc1/0x160 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e Freed by task 868: kasan_save_stack+0x20/0x40 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x37/0x50 kfree+0xf3/0x3e0 svc_export_put+0x87/0xb0 [nfsd] cache_purge+0x17f/0x1f0 [sunrpc] nfsd_destroy_serv+0x226/0x2d0 [nfsd] nfsd_svc+0x125/0x1e0 [nfsd] write_threads+0x16a/0x2a0 [nfsd] nfsctl_transaction_write+0x74/0xa0 [nfsd] vfs_write+0x1a5/0x6d0 ksys_write+0xc1/0x160 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e 2. We cannot sleep while using `rcu_read_lock`/`rcu_read_unlock`. However, `svc_export_put`/`expkey_put` will call path_put, which subsequently triggers a sleeping operation due to the following `dput`. ============================= WARNING: suspicious RCU usage 5.10.0-dirty #141 Not tainted ----------------------------- ... Call Trace: dump_stack+0x9a/0xd0 ___might_sleep+0x231/0x240 dput+0x39/0x600 path_put+0x1b/0x30 svc_export_put+0x17/0x80 e_show+0x1c9/0x200 seq_read_iter+0x63f/0x7c0 seq_read+0x226/0x2d0 vfs_read+0x113/0x2c0 ksys_read+0xc9/0x170 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x67/0xd1 Fix these issues by using `rcu_work` to help release `svc_expkey`/`svc_export`. This approach allows for an asynchronous context to invoke `path_put` and also facilitates the freeing of `uuid/exp/key` after an RCU grace period. Fixes: 9ceddd9da134 ("knfsd: Allow lockless lookups of the exports") Signed-off-by: Yang Erkun Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/export.c | 31 +++++++++++++++++++++++++------ fs/nfsd/export.h | 4 ++-- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index aa4712362b3b..eacafe46e3b6 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -40,15 +40,24 @@ #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) -static void expkey_put(struct kref *ref) +static void expkey_put_work(struct work_struct *work) { - struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + struct svc_expkey *key = + container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); if (test_bit(CACHE_VALID, &key->h.flags) && !test_bit(CACHE_NEGATIVE, &key->h.flags)) path_put(&key->ek_path); auth_domain_put(key->ek_client); - kfree_rcu(key, ek_rcu); + kfree(key); +} + +static void expkey_put(struct kref *ref) +{ + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + + INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); + queue_rcu_work(system_wq, &key->ek_rcu_work); } static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) @@ -355,16 +364,26 @@ static void export_stats_destroy(struct export_stats *stats) EXP_STATS_COUNTERS_NUM); } -static void svc_export_put(struct kref *ref) +static void svc_export_put_work(struct work_struct *work) { - struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + struct svc_export *exp = + container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); + path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); export_stats_destroy(exp->ex_stats); kfree(exp->ex_stats); kfree(exp->ex_uuid); - kfree_rcu(exp, ex_rcu); + kfree(exp); +} + +static void svc_export_put(struct kref *ref) +{ + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + + INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); + queue_rcu_work(system_wq, &exp->ex_rcu_work); } static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 4d92b99c1ffd..6f2fbaae01fa 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -75,7 +75,7 @@ struct svc_export { u32 ex_layout_types; struct nfsd4_deviceid_map *ex_devid_map; struct cache_detail *cd; - struct rcu_head ex_rcu; + struct rcu_work ex_rcu_work; unsigned long ex_xprtsec_modes; struct export_stats *ex_stats; }; @@ -92,7 +92,7 @@ struct svc_expkey { u32 ek_fsid[6]; struct path ek_path; - struct rcu_head ek_rcu; + struct rcu_work ek_rcu_work; }; #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) From 07decac0ac6282672af182521ef0b4b61605c4b9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Oct 2024 13:43:54 -0400 Subject: [PATCH 53/72] xdrgen: Remove tracepoint call site This tracepoint was a "note to self" and is not operational. It is added only to client-side code, which so far we haven't needed. It will cause immediate breakage once we start generating client code, though, so remove it now. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 index d304eccb5c40..38c31b3f0589 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 @@ -13,10 +13,8 @@ static int {{ program }}_xdr_dec_{{ result }}(struct rpc_rqst *req, if (!xdrgen_decode_{{ result }}(xdr, result)) return -EIO; - if (result->stat != nfs_ok) { - trace_nfs_xdr_status(xdr, (int)result->stat); + if (result->stat != nfs_ok) return {{ program }}_stat_to_errno(result->stat); - } {% endif %} return 0; } From 82c2a36179d9bd00b792f4215cc4f71ca2d4c3a8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Oct 2024 13:44:01 -0400 Subject: [PATCH 54/72] xdrgen: Remove check for "nfs_ok" in C templates Obviously, "nfs_ok" is defined only for NFS protocols. Other XDR protocols won't know "nfs_ok" from Adam. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 index 38c31b3f0589..4ce4cc9fab79 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 @@ -13,7 +13,7 @@ static int {{ program }}_xdr_dec_{{ result }}(struct rpc_rqst *req, if (!xdrgen_decode_{{ result }}(xdr, result)) return -EIO; - if (result->stat != nfs_ok) + if (result->stat) return {{ program }}_stat_to_errno(result->stat); {% endif %} return 0; From 903a7d37d9ea03cfed21040467d3d345d1e6fc76 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Oct 2024 13:44:08 -0400 Subject: [PATCH 55/72] xdrgen: Update the files included in client-side source code In particular, client-side source code needs the definition of "struct rpc_procinfo" and does not want header files that pull in "struct svc_rqst". Otherwise, the source does not compile. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 index e3a802cbc4d7..c5518c519854 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 @@ -3,6 +3,11 @@ // XDR specification file: {{ filename }} // XDR specification modification time: {{ mtime }} -#include +#include -#include "{{ program }}xdr_gen.h" +#include +#include +#include +#include + +#include From 573954a996c0056b25eda4638edfee8c010e27f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Oct 2024 13:44:15 -0400 Subject: [PATCH 56/72] xdrgen: Remove program_stat_to_errno() call sites Refactor: Translating an on-the-wire value to a local host errno is architecturally a job for the proc function, not the XDR decoder. Signed-off-by: Chuck Lever --- tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 index 4ce4cc9fab79..aa9940e322db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 @@ -13,8 +13,6 @@ static int {{ program }}_xdr_dec_{{ result }}(struct rpc_rqst *req, if (!xdrgen_decode_{{ result }}(xdr, result)) return -EIO; - if (result->stat) - return {{ program }}_stat_to_errno(result->stat); {% endif %} return 0; } From ce89e742a4c12b20f09a43fec1b21db33f2166cd Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 24 Oct 2024 09:55:20 +0800 Subject: [PATCH 57/72] svcrdma: fix miss destroy percpu_counter in svc_rdma_proc_init() There's issue as follows: RPC: Registered rdma transport module. RPC: Registered rdma backchannel transport module. RPC: Unregistered rdma transport module. RPC: Unregistered rdma backchannel transport module. BUG: unable to handle page fault for address: fffffbfff80c609a PGD 123fee067 P4D 123fee067 PUD 123fea067 PMD 10c624067 PTE 0 Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI RIP: 0010:percpu_counter_destroy_many+0xf7/0x2a0 Call Trace: __die+0x1f/0x70 page_fault_oops+0x2cd/0x860 spurious_kernel_fault+0x36/0x450 do_kern_addr_fault+0xca/0x100 exc_page_fault+0x128/0x150 asm_exc_page_fault+0x26/0x30 percpu_counter_destroy_many+0xf7/0x2a0 mmdrop+0x209/0x350 finish_task_switch.isra.0+0x481/0x840 schedule_tail+0xe/0xd0 ret_from_fork+0x23/0x80 ret_from_fork_asm+0x1a/0x30 If register_sysctl() return NULL, then svc_rdma_proc_cleanup() will not destroy the percpu counters which init in svc_rdma_proc_init(). If CONFIG_HOTPLUG_CPU is enabled, residual nodes may be in the 'percpu_counters' list. The above issue may occur once the module is removed. If the CONFIG_HOTPLUG_CPU configuration is not enabled, memory leakage occurs. To solve above issue just destroy all percpu counters when register_sysctl() return NULL. Fixes: 1e7e55731628 ("svcrdma: Restore read and write stats") Fixes: 22df5a22462e ("svcrdma: Convert rdma_stat_sq_starve to a per-CPU counter") Fixes: df971cd853c0 ("svcrdma: Convert rdma_stat_recv to a per-CPU counter") Signed-off-by: Ye Bin Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 58ae6ec4f25b..415c0310101f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -233,25 +233,34 @@ static int svc_rdma_proc_init(void) rc = percpu_counter_init(&svcrdma_stat_read, 0, GFP_KERNEL); if (rc) - goto out_err; + goto err; rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL); if (rc) - goto out_err; + goto err_read; rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL); if (rc) - goto out_err; + goto err_recv; rc = percpu_counter_init(&svcrdma_stat_write, 0, GFP_KERNEL); if (rc) - goto out_err; + goto err_sq; svcrdma_table_header = register_sysctl("sunrpc/svc_rdma", svcrdma_parm_table); + if (!svcrdma_table_header) + goto err_write; + return 0; -out_err: +err_write: + rc = -ENOMEM; + percpu_counter_destroy(&svcrdma_stat_write); +err_sq: percpu_counter_destroy(&svcrdma_stat_sq_starve); +err_recv: percpu_counter_destroy(&svcrdma_stat_recv); +err_read: percpu_counter_destroy(&svcrdma_stat_read); +err: return rc; } From a2c0412c051ee279d338b2c288938e484ed9a6df Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 24 Oct 2024 09:10:42 +1100 Subject: [PATCH 58/72] nfsd: Don't fail OP_SETCLIENTID when there are too many clients. Failing OP_SETCLIENTID or OP_EXCHANGE_ID should only happen if there is memory allocation failure. Putting a hard limit on the number of clients is not really helpful as it will either happen too early and prevent clients that the server can easily handle, or too late and allow clients when the server is swamped. The calculated limit is still useful for expiring courtesy clients where there are "too many" clients, but it shouldn't prevent the creation of active clients. Testing of lots of clients against small-mem servers reports repeated NFS4ERR_DELAY responses which doesn't seem helpful. There may have been reports of similar problems in production use. Also remove an outdated comment - we do use a slab cache. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 133117b6c7cc..ddca893a8804 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2231,21 +2231,16 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) return 1; } -/* - * XXX Should we use a slab cache ? - * This type of memory management is somewhat inefficient, but we use it - * anyway since SETCLIENTID is not a common operation. - */ static struct nfs4_client *alloc_client(struct xdr_netobj name, struct nfsd_net *nn) { struct nfs4_client *clp; int i; - if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) { + if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients && + atomic_read(&nn->nfsd_courtesy_clients) > 0) mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); - return NULL; - } + clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; From 6a404f475f65c3b74799f4652dc15753834b3be0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 28 Oct 2024 09:04:43 +1100 Subject: [PATCH 59/72] nfsd: make use of warning provided by refcount_t refcount_t, by design, checks for unwanted situations and provides warnings. It is rarely useful to have explicit warnings with refcount usage. In this case we have an explicit warning if a refcount_t reaches zero when decremented. Simply using refcount_dec() will provide a similar warning and also mark the refcount_t as saturated to avoid any possible use-after-free. This patch drops the warning and uses refcount_dec() instead of refcount_dec_and_test(). Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1408166222c5..c16671135d17 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1050,7 +1050,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, * the last one however, since we should hold another. */ if (nfsd_file_lru_remove(nf)) - WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); + refcount_dec(&nf->nf_ref); goto wait_for_construction; } From 53f9ba78e07cb19f0895b9cf905fb08b70a126f4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Oct 2024 10:26:26 -0400 Subject: [PATCH 60/72] nfsd: remove nfsd4_session->se_bchannel This field is written and is never consulted again. Remove it. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 2 -- fs/nfsd/state.h | 1 - 2 files changed, 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ddca893a8804..a19324a68fb3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2002,8 +2002,6 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, } memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); - memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs)); - return new; out_free: while (i--) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index da22eaff3755..2485e62347bd 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -304,7 +304,6 @@ struct nfsd4_session { struct nfs4_client *se_client; struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; - struct nfsd4_channel_attrs se_bchannel; struct nfsd4_cb_sec se_cb_sec; struct list_head se_conns; u32 se_cb_prog; From 10c93b5101ca61d03351da678b395b48678840c2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Oct 2024 10:48:46 -0400 Subject: [PATCH 61/72] nfsd: make nfsd4_session->se_flags a bool While this holds the flags from the CREATE_SESSION request, nothing ever consults them. The only flag used is NFS4_SESSION_DEAD. Make it a simple bool instead. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 6 +++--- fs/nfsd/state.h | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a19324a68fb3..a334f0a0e50a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -149,14 +149,14 @@ void nfsd4_destroy_laundry_wq(void) static bool is_session_dead(struct nfsd4_session *ses) { - return ses->se_flags & NFS4_SESSION_DEAD; + return ses->se_dead; } static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { if (atomic_read(&ses->se_ref) > ref_held_by_me) return nfserr_jukebox; - ses->se_flags |= NFS4_SESSION_DEAD; + ses->se_dead = true; return nfs_ok; } @@ -2133,7 +2133,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru INIT_LIST_HEAD(&new->se_conns); new->se_cb_seq_nr = 1; - new->se_flags = cses->flags; + new->se_dead = false; new->se_cb_prog = cses->callback_prog; new->se_cb_sec = cses->cb_sec; atomic_set(&new->se_ref, 0); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 2485e62347bd..2f735492cf6c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -296,11 +296,9 @@ struct nfsd4_conn { */ struct nfsd4_session { atomic_t se_ref; + bool se_dead; struct list_head se_hash; /* hash by sessionid */ struct list_head se_perclnt; -/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */ -#define NFS4_SESSION_DEAD 0x010 - u32 se_flags; struct nfs4_client *se_client; struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; From a4452e661bc8ee56b2a893df6f523607c63f6de8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:02 -0400 Subject: [PATCH 62/72] NFSD: Add a tracepoint to record canceled async COPY operations Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 1 + fs/nfsd/trace.h | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 94cc7b2ac385..ca383ebc1790 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1287,6 +1287,7 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) static void nfsd4_stop_copy(struct nfsd4_copy *copy) { + trace_nfsd_copy_async_cancel(copy); if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) kthread_stop(copy->copy_task); nfs4_put_copy(copy); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index f318898cfc31..6b7bf8129e49 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -2244,7 +2244,7 @@ TRACE_EVENT(nfsd_copy_done, ) ); -TRACE_EVENT(nfsd_copy_async_done, +DECLARE_EVENT_CLASS(nfsd_copy_async_done_class, TP_PROTO( const struct nfsd4_copy *copy ), @@ -2313,6 +2313,15 @@ TRACE_EVENT(nfsd_copy_async_done, ) ); +#define DEFINE_COPY_ASYNC_DONE_EVENT(name) \ +DEFINE_EVENT(nfsd_copy_async_done_class, \ + nfsd_copy_async_##name, \ + TP_PROTO(const struct nfsd4_copy *copy), \ + TP_ARGS(copy)) + +DEFINE_COPY_ASYNC_DONE_EVENT(done); +DEFINE_COPY_ASYNC_DONE_EVENT(cancel); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH From 62a8642ba00aa8ceb0a02ade942f5ec52e877c95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:03 -0400 Subject: [PATCH 63/72] NFSD: Fix nfsd4_shutdown_copy() nfsd4_shutdown_copy() is just this: while ((copy = nfsd4_get_copy(clp)) != NULL) nfsd4_stop_copy(copy); nfsd4_get_copy() bumps @copy's reference count, preventing nfsd4_stop_copy() from releasing @copy. A while loop like this usually works by removing the first element of the list, but neither nfsd4_get_copy() nor nfsd4_stop_copy() alters the async_copies list. Best I can tell, then, is that nfsd4_shutdown_copy() continues to loop until other threads manage to remove all the items from this list. The spinning loop blocks shutdown until these items are gone. Possibly the reason we haven't seen this issue in the field is because client_has_state() prevents __destroy_client() from calling nfsd4_shutdown_copy() if there are any items on this list. In a subsequent patch I plan to remove that restriction. Fixes: e0639dc5805a ("NFSD introduce async copy feature") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ca383ebc1790..5ad55b734541 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1293,7 +1293,7 @@ static void nfsd4_stop_copy(struct nfsd4_copy *copy) nfs4_put_copy(copy); } -static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp) +static struct nfsd4_copy *nfsd4_unhash_copy(struct nfs4_client *clp) { struct nfsd4_copy *copy = NULL; @@ -1302,6 +1302,9 @@ static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp) copy = list_first_entry(&clp->async_copies, struct nfsd4_copy, copies); refcount_inc(©->refcount); + copy->cp_clp = NULL; + if (!list_empty(©->copies)) + list_del_init(©->copies); } spin_unlock(&clp->async_lock); return copy; @@ -1311,7 +1314,7 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) { struct nfsd4_copy *copy; - while ((copy = nfsd4_get_copy(clp)) != NULL) + while ((copy = nfsd4_unhash_copy(clp)) != NULL) nfsd4_stop_copy(copy); } #ifdef CONFIG_NFSD_V4_2_INTER_SSC From 409d6f52bd6b4fb43fbb4db9daeb5022e2e1d00c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:04 -0400 Subject: [PATCH 64/72] NFSD: Free async copy information in nfsd4_cb_offload_release() RFC 7862 Section 4.8 states: > A copy offload stateid will be valid until either (A) the client > or server restarts or (B) the client returns the resource by > issuing an OFFLOAD_CANCEL operation or the client replies to a > CB_OFFLOAD operation. Currently, NFSD purges the metadata for an async COPY operation as soon as the CB_OFFLOAD callback has been sent. It does not wait even for the client's CB_OFFLOAD response, as the paragraph above suggests that it should. This makes the OFFLOAD_STATUS operation ineffective during the window between the completion of an asynchronous COPY and the server's receipt of the corresponding CB_OFFLOAD response. This is important if, for example, the client responds with NFS4ERR_DELAY, or the transport is lost before the server receives the response. A client might use OFFLOAD_STATUS to query the server about the still pending asynchronous COPY, but NFSD will respond to OFFLOAD_STATUS as if it had never heard of the presented copy stateid. This patch starts to address this issue by extending the lifetime of struct nfsd4_copy at least until the server has seen the client's CB_OFFLOAD response, or the CB_OFFLOAD has timed out. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 17 ++++++++++------- fs/nfsd/xdr4.h | 3 +++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5ad55b734541..5c1013141095 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -57,6 +57,8 @@ module_param(inter_copy_offload_enable, bool, 0644); MODULE_PARM_DESC(inter_copy_offload_enable, "Enable inter server to server copy offload. Default: false"); +static void cleanup_async_copy(struct nfsd4_copy *copy); + #ifdef CONFIG_NFSD_V4_2_INTER_SSC static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */ module_param(nfsd4_ssc_umount_timeout, int, 0644); @@ -1602,8 +1604,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { struct nfsd4_cb_offload *cbo = container_of(cb, struct nfsd4_cb_offload, co_cb); + struct nfsd4_copy *copy = + container_of(cbo, struct nfsd4_copy, cp_cb_offload); - kfree(cbo); + cleanup_async_copy(copy); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, @@ -1736,11 +1740,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) { - struct nfsd4_cb_offload *cbo; - - cbo = kzalloc(sizeof(*cbo), GFP_KERNEL); - if (!cbo) - return; + struct nfsd4_cb_offload *cbo = ©->cp_cb_offload; memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); @@ -1790,10 +1790,13 @@ static int nfsd4_do_async_copy(void *data) } do_callback: + /* The kthread exits forthwith. Ensure that a subsequent + * OFFLOAD_CANCEL won't try to kill it again. */ + set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags); + set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); trace_nfsd_copy_async_done(copy); nfsd4_send_cb_offload(copy); - cleanup_async_copy(copy); return 0; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index d5bb9a3c2da4..08e5d280f887 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -700,6 +700,9 @@ struct nfsd4_copy { struct nfsd42_write_res cp_res; struct knfsd_fh fh; + /* offload callback */ + struct nfsd4_cb_offload cp_cb_offload; + struct nfs4_client *cp_clp; struct nfsd_file *nf_src; From 5c41f321470a5400641d3a271014ddd9b9868373 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:05 -0400 Subject: [PATCH 65/72] NFSD: Handle an NFS4ERR_DELAY response to CB_OFFLOAD RFC 7862 permits callback services to respond to CB_OFFLOAD with NFS4ERR_DELAY. Currently NFSD drops the CB_OFFLOAD in that case. To improve the reliability of COPY offload, NFSD should rather send another CB_OFFLOAD completion notification. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 8 ++++++++ fs/nfsd/xdr4.h | 1 + 2 files changed, 9 insertions(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5c1013141095..54db3979605f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1617,6 +1617,13 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, container_of(cb, struct nfsd4_cb_offload, co_cb); trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task); + switch (task->tk_status) { + case -NFS4ERR_DELAY: + if (cbo->co_retries--) { + rpc_delay(task, 1 * HZ); + return 0; + } + } return 1; } @@ -1745,6 +1752,7 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); cbo->co_nfserr = copy->nfserr; + cbo->co_retries = 5; nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 08e5d280f887..550f7e064f2b 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -676,6 +676,7 @@ struct nfsd4_cb_offload { struct nfsd4_callback co_cb; struct nfsd42_write_res co_res; __be32 co_nfserr; + unsigned int co_retries; struct knfsd_fh co_fh; }; From b44ffa4c4f57ffe8a0967963538689fed169f1c8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:06 -0400 Subject: [PATCH 66/72] NFSD: Block DESTROY_CLIENTID only when there are ongoing async COPY operations Currently __destroy_client() consults the nfs4_client's async_copies list to determine whether there are ongoing async COPY operations. However, NFSD now keeps copy state in that list even when the async copy has completed, to enable OFFLOAD_STATUS to find the COPY results for a while after the COPY has completed. DESTROY_CLIENTID should not be blocked if the client's async_copies list contains state for only completed copy operations. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 30 ++++++++++++++++++++++++++++++ fs/nfsd/nfs4state.c | 2 +- fs/nfsd/state.h | 1 + 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 54db3979605f..67349eca55b1 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1278,6 +1278,36 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } +/** + * nfsd4_has_active_async_copies - Check for ongoing copy operations + * @clp: Client to be checked + * + * NFSD maintains state for async COPY operations after they complete, + * and this state remains in the nfs4_client's async_copies list. + * Ongoing copies should block the destruction of the nfs4_client, but + * completed copies should not. + * + * Return values: + * %true: At least one active async COPY is ongoing + * %false: No active async COPY operations were found + */ +bool nfsd4_has_active_async_copies(struct nfs4_client *clp) +{ + struct nfsd4_copy *copy; + bool result = false; + + spin_lock(&clp->async_lock); + list_for_each_entry(copy, &clp->async_copies, copies) { + if (!test_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags) && + !test_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) { + result = true; + break; + } + } + spin_unlock(&clp->async_lock); + return result; +} + static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a334f0a0e50a..b8bedc6a157f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3472,7 +3472,7 @@ static bool client_has_state(struct nfs4_client *clp) #endif || !list_empty(&clp->cl_delegations) || !list_empty(&clp->cl_sessions) - || !list_empty(&clp->async_copies); + || nfsd4_has_active_async_copies(clp); } static __be32 copy_impl_id(struct nfs4_client *clp, diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 2f735492cf6c..4dd7ed7ae052 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -738,6 +738,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, extern bool nfsd4_run_cb(struct nfsd4_callback *cb); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); +bool nfsd4_has_active_async_copies(struct nfs4_client *clp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); From ac0514f4d198b5d1d5ba367b122cdf5a68e711d4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:07 -0400 Subject: [PATCH 67/72] NFSD: Add a laundromat reaper for async copy state RFC 7862 Section 4.8 states: > A copy offload stateid will be valid until either (A) the client > or server restarts or (B) the client returns the resource by > issuing an OFFLOAD_CANCEL operation or the client replies to a > CB_OFFLOAD operation. Instead of releasing async copy state when the CB_OFFLOAD callback completes, now let it live until the next laundromat run after the callback completes. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 35 ++++++++++++++++++++++++++++++++++- fs/nfsd/nfs4state.c | 1 + fs/nfsd/state.h | 1 + fs/nfsd/xdr4.h | 1 + 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 67349eca55b1..dfc118bf9aa5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1308,6 +1308,39 @@ bool nfsd4_has_active_async_copies(struct nfs4_client *clp) return result; } +/** + * nfsd4_async_copy_reaper - Purge completed copies + * @nn: Network namespace with possible active copy information + */ +void nfsd4_async_copy_reaper(struct nfsd_net *nn) +{ + struct nfs4_client *clp; + struct nfsd4_copy *copy; + LIST_HEAD(reaplist); + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + struct list_head *pos, *next; + + spin_lock(&clp->async_lock); + list_for_each_safe(pos, next, &clp->async_copies) { + copy = list_entry(pos, struct nfsd4_copy, copies); + if (test_bit(NFSD4_COPY_F_OFFLOAD_DONE, ©->cp_flags)) { + list_del_init(©->copies); + list_add(©->copies, &reaplist); + } + } + spin_unlock(&clp->async_lock); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&reaplist)) { + copy = list_first_entry(&reaplist, struct nfsd4_copy, copies); + list_del_init(©->copies); + cleanup_async_copy(copy); + } +} + static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) @@ -1637,7 +1670,7 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) struct nfsd4_copy *copy = container_of(cbo, struct nfsd4_copy, cp_cb_offload); - cleanup_async_copy(copy); + set_bit(NFSD4_COPY_F_OFFLOAD_DONE, ©->cp_flags); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b8bedc6a157f..10aa7732e914 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6574,6 +6574,7 @@ nfs4_laundromat(struct nfsd_net *nn) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); + nfsd4_async_copy_reaper(nn); nfs4_get_client_reaplist(nn, &reaplist, <); nfs4_process_client_reaplist(&reaplist); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4dd7ed7ae052..dcbebd53e5f4 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -738,6 +738,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, extern bool nfsd4_run_cb(struct nfsd4_callback *cb); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); +void nfsd4_async_copy_reaper(struct nfsd_net *nn); bool nfsd4_has_active_async_copies(struct nfs4_client *clp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 550f7e064f2b..5951360f6d03 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -695,6 +695,7 @@ struct nfsd4_copy { #define NFSD4_COPY_F_SYNCHRONOUS (2) #define NFSD4_COPY_F_COMMITTED (3) #define NFSD4_COPY_F_COMPLETED (4) +#define NFSD4_COPY_F_OFFLOAD_DONE (5) /* response */ __be32 nfserr; From aa0ebd21df9c90a69f8bf00e3fa49d476be8e290 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:08 -0400 Subject: [PATCH 68/72] NFSD: Add nfsd4_copy time-to-live Keep async copy state alive for a few lease cycles after the copy completes so that OFFLOAD_STATUS returns something meaningful. This means that NFSD's client shutdown processing needs to purge any of this state that happens to be waiting to die. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 7 +++++-- fs/nfsd/state.h | 15 +++++++++++++++ fs/nfsd/xdr4.h | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index dfc118bf9aa5..f8a10f90bc7a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1326,8 +1326,10 @@ void nfsd4_async_copy_reaper(struct nfsd_net *nn) list_for_each_safe(pos, next, &clp->async_copies) { copy = list_entry(pos, struct nfsd4_copy, copies); if (test_bit(NFSD4_COPY_F_OFFLOAD_DONE, ©->cp_flags)) { - list_del_init(©->copies); - list_add(©->copies, &reaplist); + if (--copy->cp_ttl) { + list_del_init(©->copies); + list_add(©->copies, &reaplist); + } } } spin_unlock(&clp->async_lock); @@ -1921,6 +1923,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, async_copy->cp_nn = nn; INIT_LIST_HEAD(&async_copy->copies); refcount_set(&async_copy->refcount, 1); + async_copy->cp_ttl = NFSD_COPY_INITIAL_TTL; /* Arbitrary cap on number of pending async copy operations */ if (atomic_inc_return(&nn->pending_async_copies) > (int)rqstp->rq_pool->sp_nrthreads) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index dcbebd53e5f4..8092894e8f3c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -137,6 +137,21 @@ struct nfs4_cpntf_state { time64_t cpntf_time; /* last time stateid used */ }; +/* + * RFC 7862 Section 4.8 states: + * + * | A copy offload stateid will be valid until either (A) the client + * | or server restarts or (B) the client returns the resource by + * | issuing an OFFLOAD_CANCEL operation or the client replies to a + * | CB_OFFLOAD operation. + * + * Because a client might not reply to a CB_OFFLOAD, or a reply + * might get lost due to connection loss, NFSD purges async copy + * state after a short period to prevent it from accumulating + * over time. + */ +#define NFSD_COPY_INITIAL_TTL 10 + struct nfs4_cb_fattr { struct nfsd4_callback ncf_getattr; u32 ncf_cb_status; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 5951360f6d03..382cc1389396 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -715,6 +715,7 @@ struct nfsd4_copy { struct list_head copies; struct task_struct *copy_task; refcount_t refcount; + unsigned int cp_ttl; struct nfsd4_ssc_umount_item *ss_nsui; struct nfs_fh c_fh; From 98100e88dd8865999dc6379a3356cd799795fe7b Mon Sep 17 00:00:00 2001 From: Yang Erkun Date: Tue, 5 Nov 2024 19:03:14 +0800 Subject: [PATCH 69/72] nfsd: fix nfs4_openowner leak when concurrent nfsd4_open occur The action force umount(umount -f) will attempt to kill all rpc_task even umount operation may ultimately fail if some files remain open. Consequently, if an action attempts to open a file, it can potentially send two rpc_task to nfs server. NFS CLIENT thread1 thread2 open("file") ... nfs4_do_open _nfs4_do_open _nfs4_open_and_get_state _nfs4_proc_open nfs4_run_open_task /* rpc_task1 */ rpc_run_task rpc_wait_for_completion_task umount -f nfs_umount_begin rpc_killall_tasks rpc_signal_task rpc_task1 been wakeup and return -512 _nfs4_do_open // while loop ... nfs4_run_open_task /* rpc_task2 */ rpc_run_task rpc_wait_for_completion_task While processing an open request, nfsd will first attempt to find or allocate an nfs4_openowner. If it finds an nfs4_openowner that is not marked as NFS4_OO_CONFIRMED, this nfs4_openowner will released. Since two rpc_task can attempt to open the same file simultaneously from the client to server, and because two instances of nfsd can run concurrently, this situation can lead to lots of memory leak. Additionally, when we echo 0 to /proc/fs/nfsd/threads, warning will be triggered. NFS SERVER nfsd1 nfsd2 echo 0 > /proc/fs/nfsd/threads nfsd4_open nfsd4_process_open1 find_or_alloc_open_stateowner // alloc oo1, stateid1 nfsd4_open nfsd4_process_open1 find_or_alloc_open_stateowner // find oo1, without NFS4_OO_CONFIRMED release_openowner unhash_openowner_locked list_del_init(&oo->oo_perclient) // cannot find this oo // from client, LEAK!!! alloc_stateowner // alloc oo2 nfsd4_process_open2 init_open_stateid // associate oo1 // with stateid1, stateid1 LEAK!!! nfs4_get_vfs_file // alloc nfsd_file1 and nfsd_file_mark1 // all LEAK!!! nfsd4_process_open2 ... write_threads ... nfsd_destroy_serv nfsd_shutdown_net nfs4_state_shutdown_net nfs4_state_destroy_net destroy_client __destroy_client // won't find oo1!!! nfsd_shutdown_generic nfsd_file_cache_shutdown kmem_cache_destroy for nfsd_file_slab and nfsd_file_mark_slab // bark since nfsd_file1 // and nfsd_file_mark1 // still alive ======================================================================= BUG nfsd_file (Not tainted): Objects remaining in nfsd_file on __kmem_cache_shutdown() ----------------------------------------------------------------------- Slab 0xffd4000004438a80 objects=34 used=1 fp=0xff11000110e2ad28 flags=0x17ffffc0000240(workingset|head|node=0|zone=2|lastcpupid=0x1fffff) CPU: 4 UID: 0 PID: 757 Comm: sh Not tainted 6.12.0-rc6+ #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 Call Trace: dump_stack_lvl+0x53/0x70 slab_err+0xb0/0xf0 __kmem_cache_shutdown+0x15c/0x310 kmem_cache_destroy+0x66/0x160 nfsd_file_cache_shutdown+0xac/0x210 [nfsd] nfsd_destroy_serv+0x251/0x2a0 [nfsd] nfsd_svc+0x125/0x1e0 [nfsd] write_threads+0x16a/0x2a0 [nfsd] nfsctl_transaction_write+0x74/0xa0 [nfsd] vfs_write+0x1ae/0x6d0 ksys_write+0xc1/0x160 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e Disabling lock debugging due to kernel taint Object 0xff11000110e2ac38 @offset=3128 Allocated in nfsd_file_do_acquire+0x20f/0xa30 [nfsd] age=1635 cpu=3 pid=800 nfsd_file_do_acquire+0x20f/0xa30 [nfsd] nfsd_file_acquire_opened+0x5f/0x90 [nfsd] nfs4_get_vfs_file+0x4c9/0x570 [nfsd] nfsd4_process_open2+0x713/0x1070 [nfsd] nfsd4_open+0x74b/0x8b0 [nfsd] nfsd4_proc_compound+0x70b/0xc20 [nfsd] nfsd_dispatch+0x1b4/0x3a0 [nfsd] svc_process_common+0x5b8/0xc50 [sunrpc] svc_process+0x2ab/0x3b0 [sunrpc] svc_handle_xprt+0x681/0xa20 [sunrpc] nfsd+0x183/0x220 [nfsd] kthread+0x199/0x1e0 ret_from_fork+0x31/0x60 ret_from_fork_asm+0x1a/0x30 Add nfs4_openowner_unhashed to help found unhashed nfs4_openowner, and break nfsd4_open process to fix this problem. Cc: stable@vger.kernel.org # v5.4+ Reviewed-by: Jeff Layton Signed-off-by: Yang Erkun Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 10aa7732e914..2cafe90a3328 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1652,6 +1652,14 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) free_ol_stateid_reaplist(&reaplist); } +static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo) +{ + lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); + + return list_empty(&oo->oo_owner.so_strhash) && + list_empty(&oo->oo_perclient); +} + static void unhash_openowner_locked(struct nfs4_openowner *oo) { struct nfs4_client *clp = oo->oo_owner.so_client; @@ -4988,6 +4996,12 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); + if (nfs4_openowner_unhashed(oo)) { + mutex_unlock(&stp->st_mutex); + stp = NULL; + goto out_unlock; + } + retstp = nfsd4_find_existing_open(fp, open); if (retstp) goto out_unlock; @@ -6139,6 +6153,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (!stp) { stp = init_open_stateid(fp, open); + if (!stp) { + status = nfserr_jukebox; + goto out; + } + if (!open->op_stp) new_stp = true; } From 07442ec85bded6692f2e5909f16ab8bbc86fb3be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Nov 2024 21:35:24 +0000 Subject: [PATCH 70/72] nfsd: get rid of include ../internal.h added back in 2015 for the sake of vfs_clone_file_range(), which is in linux/fs.h these days Signed-off-by: Al Viro Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index eee39bab49a8..29cb7b812d71 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -35,7 +35,6 @@ #include "xdr3.h" #ifdef CONFIG_NFSD_V4 -#include "../internal.h" #include "acl.h" #include "idmap.h" #include "xdr4.h" From c840b8e1f039e90f97ca55525667eb961422f86c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 13 Nov 2024 22:59:38 -0500 Subject: [PATCH 71/72] nfs_common: must not hold RCU while calling nfsd_file_put_local Move holding the RCU from nfs_to_nfsd_file_put_local to nfs_to_nfsd_net_put. It is the call to nfs_to->nfsd_serv_put that requires the RCU anyway (the puts for nfsd_file and netns were combined to avoid an extra indirect reference but that micro-optimization isn't possible now). This fixes xfstests generic/013 and it triggering: "Voluntary context switch within RCU read-side critical section!" [ 143.545738] Call Trace: [ 143.546206] [ 143.546625] ? show_regs+0x6d/0x80 [ 143.547267] ? __warn+0x91/0x140 [ 143.547951] ? rcu_note_context_switch+0x496/0x5d0 [ 143.548856] ? report_bug+0x193/0x1a0 [ 143.549557] ? handle_bug+0x63/0xa0 [ 143.550214] ? exc_invalid_op+0x1d/0x80 [ 143.550938] ? asm_exc_invalid_op+0x1f/0x30 [ 143.551736] ? rcu_note_context_switch+0x496/0x5d0 [ 143.552634] ? wakeup_preempt+0x62/0x70 [ 143.553358] __schedule+0xaa/0x1380 [ 143.554025] ? _raw_spin_unlock_irqrestore+0x12/0x40 [ 143.554958] ? try_to_wake_up+0x1fe/0x6b0 [ 143.555715] ? wake_up_process+0x19/0x20 [ 143.556452] schedule+0x2e/0x120 [ 143.557066] schedule_preempt_disabled+0x19/0x30 [ 143.557933] rwsem_down_read_slowpath+0x24d/0x4a0 [ 143.558818] ? xfs_efi_item_format+0x50/0xc0 [xfs] [ 143.559894] down_read+0x4e/0xb0 [ 143.560519] xlog_cil_commit+0x1b2/0xbc0 [xfs] [ 143.561460] ? _raw_spin_unlock+0x12/0x30 [ 143.562212] ? xfs_inode_item_precommit+0xc7/0x220 [xfs] [ 143.563309] ? xfs_trans_run_precommits+0x69/0xd0 [xfs] [ 143.564394] __xfs_trans_commit+0xb5/0x330 [xfs] [ 143.565367] xfs_trans_roll+0x48/0xc0 [xfs] [ 143.566262] xfs_defer_trans_roll+0x57/0x100 [xfs] [ 143.567278] xfs_defer_finish_noroll+0x27a/0x490 [xfs] [ 143.568342] xfs_defer_finish+0x1a/0x80 [xfs] [ 143.569267] xfs_bunmapi_range+0x4d/0xb0 [xfs] [ 143.570208] xfs_itruncate_extents_flags+0x13d/0x230 [xfs] [ 143.571353] xfs_free_eofblocks+0x12e/0x190 [xfs] [ 143.572359] xfs_file_release+0x12d/0x140 [xfs] [ 143.573324] __fput+0xe8/0x2d0 [ 143.573922] __fput_sync+0x1d/0x30 [ 143.574574] nfsd_filp_close+0x33/0x60 [nfsd] [ 143.575430] nfsd_file_free+0x96/0x150 [nfsd] [ 143.576274] nfsd_file_put+0xf7/0x1a0 [nfsd] [ 143.577104] nfsd_file_put_local+0x18/0x30 [nfsd] [ 143.578070] nfs_close_local_fh+0x101/0x110 [nfs_localio] [ 143.579079] __put_nfs_open_context+0xc9/0x180 [nfs] [ 143.580031] nfs_file_clear_open_context+0x4a/0x60 [nfs] [ 143.581038] nfs_file_release+0x3e/0x60 [nfs] [ 143.581879] __fput+0xe8/0x2d0 [ 143.582464] __fput_sync+0x1d/0x30 [ 143.583108] __x64_sys_close+0x41/0x80 [ 143.583823] x64_sys_call+0x189a/0x20d0 [ 143.584552] do_syscall_64+0x64/0x170 [ 143.585240] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 143.586185] RIP: 0033:0x7f3c5153efd7 Fixes: 65f2a5c36635 ("nfs_common: fix race in NFS calls to nfsd_file_put_local() and nfsd_serv_put()") Signed-off-by: Mike Snitzer Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfs_common/nfslocalio.c | 8 +++----- fs/nfsd/filecache.c | 14 +++++++------- fs/nfsd/filecache.h | 2 +- include/linux/nfslocalio.h | 18 +++++++++++++++--- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 09404d142d1a..a74ec08f6c96 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -155,11 +155,9 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, /* We have an implied reference to net thanks to nfsd_serv_try_get */ localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, cred, nfs_fh, fmode); - if (IS_ERR(localio)) { - rcu_read_lock(); - nfs_to->nfsd_serv_put(net); - rcu_read_unlock(); - } + if (IS_ERR(localio)) + nfs_to_nfsd_net_put(net); + return localio; } EXPORT_SYMBOL_GPL(nfs_open_local_fh); diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index c16671135d17..9a62b4da89bb 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -391,19 +391,19 @@ nfsd_file_put(struct nfsd_file *nf) } /** - * nfsd_file_put_local - put the reference to nfsd_file and local nfsd_serv - * @nf: nfsd_file of which to put the references + * nfsd_file_put_local - put nfsd_file reference and arm nfsd_serv_put in caller + * @nf: nfsd_file of which to put the reference * - * First put the reference of the nfsd_file and then put the - * reference to the associated nn->nfsd_serv. + * First save the associated net to return to caller, then put + * the reference of the nfsd_file. */ -void -nfsd_file_put_local(struct nfsd_file *nf) __must_hold(rcu) +struct net * +nfsd_file_put_local(struct nfsd_file *nf) { struct net *net = nf->nf_net; nfsd_file_put(nf); - nfsd_serv_put(net); + return net; } /** diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index cadf3c2689c4..d5db6b34ba30 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -55,7 +55,7 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); -void nfsd_file_put_local(struct nfsd_file *nf); +struct net *nfsd_file_put_local(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); struct file *nfsd_file_file(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 3982fea79919..9202f4b24343 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -55,7 +55,7 @@ struct nfsd_localio_operations { const struct cred *, const struct nfs_fh *, const fmode_t); - void (*nfsd_file_put_local)(struct nfsd_file *); + struct net *(*nfsd_file_put_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; @@ -66,7 +66,7 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, struct rpc_clnt *, const struct cred *, const struct nfs_fh *, const fmode_t); -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +static inline void nfs_to_nfsd_net_put(struct net *net) { /* * Once reference to nfsd_serv is dropped, NFSD could be @@ -74,10 +74,22 @@ static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) * by always taking RCU. */ rcu_read_lock(); - nfs_to->nfsd_file_put_local(localio); + nfs_to->nfsd_serv_put(net); rcu_read_unlock(); } +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +{ + /* + * Must not hold RCU otherwise nfsd_file_put() can easily trigger: + * "Voluntary context switch within RCU read-side critical section!" + * by scheduling deep in underlying filesystem (e.g. XFS). + */ + struct net *net = nfs_to->nfsd_file_put_local(localio); + + nfs_to_nfsd_net_put(net); +} + #else /* CONFIG_NFS_LOCALIO */ static inline void nfsd_localio_ops_init(void) { From 583772eec7b0096516a8ee8b1cc31401894f1e3a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 18 Nov 2024 09:54:34 -0500 Subject: [PATCH 72/72] nfsd: allow for up to 32 callback session slots nfsd currently only uses a single slot in the callback channel, which is proving to be a bottleneck in some cases. Widen the callback channel to a max of 32 slots (subject to the client's target_maxreqs value). Change the cb_holds_slot boolean to an integer that tracks the current slot number (with -1 meaning "unassigned"). Move the callback slot tracking info into the session. Add a new u32 that acts as a bitmap to track which slots are in use, and a u32 to track the latest callback target_slotid that the client reports. To protect the new fields, add a new per-session spinlock (the se_lock). Fix nfsd41_cb_get_slot to always search for the lowest slotid (using ffs()). Finally, convert the session->se_cb_seq_nr field into an array of ints and add the necessary handling to ensure that the seqids get reset when the slot table grows after shrinking. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 118 ++++++++++++++++++++++++++++++----------- fs/nfsd/nfs4state.c | 14 +++-- fs/nfsd/state.h | 15 +++--- fs/nfsd/trace.h | 2 +- 4 files changed, 109 insertions(+), 40 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1ffac2b32d81..3877b53e429f 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -374,6 +374,19 @@ encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr, hdr->nops++; } +static u32 highest_slotid(struct nfsd4_session *ses) +{ + u32 idx; + + spin_lock(&ses->se_lock); + idx = fls(~ses->se_cb_slot_avail); + if (idx > 0) + --idx; + idx = max(idx, ses->se_cb_highest_slot); + spin_unlock(&ses->se_lock); + return idx; +} + /* * CB_SEQUENCE4args * @@ -400,15 +413,40 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr, encode_sessionid4(xdr, session); p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4); - *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */ - *p++ = xdr_zero; /* csa_slotid */ - *p++ = xdr_zero; /* csa_highest_slotid */ + *p++ = cpu_to_be32(session->se_cb_seq_nr[cb->cb_held_slot]); /* csa_sequenceid */ + *p++ = cpu_to_be32(cb->cb_held_slot); /* csa_slotid */ + *p++ = cpu_to_be32(highest_slotid(session)); /* csa_highest_slotid */ *p++ = xdr_zero; /* csa_cachethis */ xdr_encode_empty_array(p); /* csa_referring_call_lists */ hdr->nops++; } +static void update_cb_slot_table(struct nfsd4_session *ses, u32 target) +{ + /* No need to do anything if nothing changed */ + if (likely(target == READ_ONCE(ses->se_cb_highest_slot))) + return; + + spin_lock(&ses->se_lock); + if (target > ses->se_cb_highest_slot) { + int i; + + target = min(target, NFSD_BC_SLOT_TABLE_SIZE - 1); + + /* + * Growing the slot table. Reset any new sequences to 1. + * + * NB: There is some debate about whether the RFC requires this, + * but the Linux client expects it. + */ + for (i = ses->se_cb_highest_slot + 1; i <= target; ++i) + ses->se_cb_seq_nr[i] = 1; + } + ses->se_cb_highest_slot = target; + spin_unlock(&ses->se_lock); +} + /* * CB_SEQUENCE4resok * @@ -436,7 +474,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, struct nfsd4_session *session = cb->cb_clp->cl_cb_session; int status = -ESERVERFAULT; __be32 *p; - u32 dummy; + u32 seqid, slotid, target; /* * If the server returns different values for sessionID, slotID or @@ -452,21 +490,22 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, } p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); - dummy = be32_to_cpup(p++); - if (dummy != session->se_cb_seq_nr) { + seqid = be32_to_cpup(p++); + if (seqid != session->se_cb_seq_nr[cb->cb_held_slot]) { dprintk("NFS: %s Invalid sequence number\n", __func__); goto out; } - dummy = be32_to_cpup(p++); - if (dummy != 0) { + slotid = be32_to_cpup(p++); + if (slotid != cb->cb_held_slot) { dprintk("NFS: %s Invalid slotid\n", __func__); goto out; } - /* - * FIXME: process highest slotid and target highest slotid - */ + p++; // ignore current highest slot value + + target = be32_to_cpup(p++); + update_cb_slot_table(session, target); status = 0; out: cb->cb_seq_status = status; @@ -1167,6 +1206,22 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) spin_unlock(&clp->cl_lock); } +static int grab_slot(struct nfsd4_session *ses) +{ + int idx; + + spin_lock(&ses->se_lock); + idx = ffs(ses->se_cb_slot_avail) - 1; + if (idx < 0 || idx > ses->se_cb_highest_slot) { + spin_unlock(&ses->se_lock); + return -1; + } + /* clear the bit for the slot */ + ses->se_cb_slot_avail &= ~BIT(idx); + spin_unlock(&ses->se_lock); + return idx; +} + /* * There's currently a single callback channel slot. * If the slot is available, then mark it busy. Otherwise, set the @@ -1175,28 +1230,32 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task) { struct nfs4_client *clp = cb->cb_clp; + struct nfsd4_session *ses = clp->cl_cb_session; - if (!cb->cb_holds_slot && - test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + if (cb->cb_held_slot >= 0) + return true; + cb->cb_held_slot = grab_slot(ses); + if (cb->cb_held_slot < 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); /* Race breaker */ - if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { - dprintk("%s slot is busy\n", __func__); + cb->cb_held_slot = grab_slot(ses); + if (cb->cb_held_slot < 0) return false; - } rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); } - cb->cb_holds_slot = true; return true; } static void nfsd41_cb_release_slot(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; + struct nfsd4_session *ses = clp->cl_cb_session; - if (cb->cb_holds_slot) { - cb->cb_holds_slot = false; - clear_bit(0, &clp->cl_cb_slot_busy); + if (cb->cb_held_slot >= 0) { + spin_lock(&ses->se_lock); + ses->se_cb_slot_avail |= BIT(cb->cb_held_slot); + spin_unlock(&ses->se_lock); + cb->cb_held_slot = -1; rpc_wake_up_next(&clp->cl_cb_waitq); } } @@ -1213,8 +1272,8 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb) } /* - * TODO: cb_sequence should support referring call lists, cachethis, multiple - * slots, and mark callback channel down on communication errors. + * TODO: cb_sequence should support referring call lists, cachethis, + * and mark callback channel down on communication errors. */ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { @@ -1256,7 +1315,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback return true; } - if (!cb->cb_holds_slot) + if (cb->cb_held_slot < 0) goto need_restart; /* This is the operation status code for CB_SEQUENCE */ @@ -1270,10 +1329,10 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback * If CB_SEQUENCE returns an error, then the state of the slot * (sequence ID, cached reply) MUST NOT change. */ - ++session->se_cb_seq_nr; + ++session->se_cb_seq_nr[cb->cb_held_slot]; break; case -ESERVERFAULT: - ++session->se_cb_seq_nr; + ++session->se_cb_seq_nr[cb->cb_held_slot]; nfsd4_mark_cb_fault(cb->cb_clp); ret = false; break; @@ -1299,17 +1358,16 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback case -NFS4ERR_BADSLOT: goto retry_nowait; case -NFS4ERR_SEQ_MISORDERED: - if (session->se_cb_seq_nr != 1) { - session->se_cb_seq_nr = 1; + if (session->se_cb_seq_nr[cb->cb_held_slot] != 1) { + session->se_cb_seq_nr[cb->cb_held_slot] = 1; goto retry_nowait; } break; default: nfsd4_mark_cb_fault(cb->cb_clp); } - nfsd41_cb_release_slot(cb); - trace_nfsd_cb_free_slot(task, cb); + nfsd41_cb_release_slot(cb); if (RPC_SIGNALLED(task)) goto need_restart; @@ -1529,7 +1587,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); cb->cb_status = 0; cb->cb_need_restart = false; - cb->cb_holds_slot = false; + cb->cb_held_slot = -1; } /** diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2cafe90a3328..4c0aa17c5c46 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2010,6 +2010,10 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, } memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); + new->se_cb_slot_avail = ~0U; + new->se_cb_highest_slot = min(battrs->maxreqs - 1, + NFSD_BC_SLOT_TABLE_SIZE - 1); + spin_lock_init(&new->se_lock); return new; out_free: while (i--) @@ -2140,11 +2144,14 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru INIT_LIST_HEAD(&new->se_conns); - new->se_cb_seq_nr = 1; + atomic_set(&new->se_ref, 0); new->se_dead = false; new->se_cb_prog = cses->callback_prog; new->se_cb_sec = cses->cb_sec; - atomic_set(&new->se_ref, 0); + + for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx) + new->se_cb_seq_nr[idx] = 1; + idx = hash_sessionid(&new->se_sessionid); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); @@ -3153,7 +3160,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); clp->cl_time = ktime_get_boottime_seconds(); - clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; @@ -3935,6 +3941,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_PERSIST; /* Upshifting from TCP to RDMA is not supported */ cr_ses->flags &= ~SESSION4_RDMA; + /* Report the correct number of backchannel slots */ + cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1; init_session(rqstp, new, conf, cr_ses); nfsd4_get_session_locked(new); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 8092894e8f3c..e16bb3717fb9 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -71,8 +71,8 @@ struct nfsd4_callback { struct work_struct cb_work; int cb_seq_status; int cb_status; + int cb_held_slot; bool cb_need_restart; - bool cb_holds_slot; }; struct nfsd4_callback_ops { @@ -304,6 +304,9 @@ struct nfsd4_conn { unsigned char cn_flags; }; +/* Maximum number of slots that nfsd will use in the backchannel */ +#define NFSD_BC_SLOT_TABLE_SIZE (sizeof(u32) * 8) + /* * Representation of a v4.1+ session. These are refcounted in a similar fashion * to the nfs4_client. References are only taken when the server is actively @@ -311,6 +314,10 @@ struct nfsd4_conn { */ struct nfsd4_session { atomic_t se_ref; + spinlock_t se_lock; + u32 se_cb_slot_avail; /* bitmap of available slots */ + u32 se_cb_highest_slot; /* highest slot client wants */ + u32 se_cb_prog; bool se_dead; struct list_head se_hash; /* hash by sessionid */ struct list_head se_perclnt; @@ -319,8 +326,7 @@ struct nfsd4_session { struct nfsd4_channel_attrs se_fchannel; struct nfsd4_cb_sec se_cb_sec; struct list_head se_conns; - u32 se_cb_prog; - u32 se_cb_seq_nr; + u32 se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE]; struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; @@ -454,9 +460,6 @@ struct nfs4_client { */ struct dentry *cl_nfsd_info_dentry; - /* for nfs41 callbacks */ - /* We currently support a single back channel with a single slot */ - unsigned long cl_cb_slot_busy; struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ struct net *net; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 6b7bf8129e49..696c89f68a9e 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -1697,7 +1697,7 @@ TRACE_EVENT(nfsd_cb_free_slot, __entry->cl_id = sid->clientid.cl_id; __entry->seqno = sid->sequence; __entry->reserved = sid->reserved; - __entry->slot_seqno = session->se_cb_seq_nr; + __entry->slot_seqno = session->se_cb_seq_nr[cb->cb_held_slot]; ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u",