From e9cdebbe23f1aa9a1caea169862f479ab3fa2773 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Mon, 6 Nov 2023 15:24:38 -0600 Subject: [PATCH 1/6] dlm: use kernel_connect() and kernel_bind() Recent changes to kernel_connect() and kernel_bind() ensure that callers are insulated from changes to the address parameter made by BPF SOCK_ADDR hooks. This patch wraps direct calls to ops->connect() and ops->bind() with kernel_connect() and kernel_bind() to protect callers in such cases. Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.camel@redhat.com/ Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect") Fixes: 4fbac77d2d09 ("bpf: Hooks for sys_bind") Cc: stable@vger.kernel.org Signed-off-by: Jordan Rife Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 67f8dd8a05ef..6296c62c10fa 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1817,8 +1817,8 @@ static int dlm_tcp_bind(struct socket *sock) memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr)); make_sockaddr(&src_addr, 0, &addr_len); - result = sock->ops->bind(sock, (struct sockaddr *)&src_addr, - addr_len); + result = kernel_bind(sock, (struct sockaddr *)&src_addr, + addr_len); if (result < 0) { /* This *may* not indicate a critical error */ log_print("could not bind for connect: %d", result); @@ -1830,7 +1830,7 @@ static int dlm_tcp_bind(struct socket *sock) static int dlm_tcp_connect(struct connection *con, struct socket *sock, struct sockaddr *addr, int addr_len) { - return sock->ops->connect(sock, addr, addr_len, O_NONBLOCK); + return kernel_connect(sock, addr, addr_len, O_NONBLOCK); } static int dlm_tcp_listen_validate(void) @@ -1862,8 +1862,8 @@ static int dlm_tcp_listen_bind(struct socket *sock) /* Bind to our port */ make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); - return sock->ops->bind(sock, (struct sockaddr *)&dlm_local_addr[0], - addr_len); + return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0], + addr_len); } static const struct dlm_proto_ops dlm_tcp_ops = { @@ -1888,12 +1888,12 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock, int ret; /* - * Make sock->ops->connect() function return in specified time, + * Make kernel_connect() function return in specified time, * since O_NONBLOCK argument in connect() function does not work here, * then, we should restore the default value of this attribute. */ sock_set_sndtimeo(sock->sk, 5); - ret = sock->ops->connect(sock, addr, addr_len, 0); + ret = kernel_connect(sock, addr, addr_len, 0); sock_set_sndtimeo(sock->sk, 0); return ret; } From dbee1adeb7e6d31c9afbad8e9248c15694f1cc0c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 13 Nov 2023 16:24:09 -0500 Subject: [PATCH 2/6] dlm: use fl_owner from lockd This patch is changing the fl_owner value in case of an nfs lock request to not be the pid of lockd. Instead this patch changes it to be the owner value that nfs is giving us. Currently there exists proved problems with this behaviour. One nfsd server was created to export a gfs2 filesystem mount. Two nfs clients doing a nfs mount of this export. Those two clients should conflict each other operating on the same nfs file. A small test program was written: int main(int argc, const char *argv[]) { struct flock fl = { .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 1L, .l_len = 1L, }; int fd; fd = open("filename", O_RDWR | O_CREAT, 0700); printf("try to lock...\n"); fcntl(fd, F_SETLKW, &fl); printf("locked!\n"); getc(stdin); return 0; } Running on both clients at the same time and don't interrupting by pressing any key. It will show that both clients are able to acquire the lock which shouldn't be the case. The issue is here that the fl_owner value is the same and the lock context of both clients should be separated. This patch lets lockd define how to deal with lock contexts and chose hopefully the right fl_owner value. A test after this patch was made and the locks conflicts each other which should be the case. Acked-by: Jeff Layton Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/plock.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index e6b4c1a21446..ee6e0236d4f8 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -145,6 +145,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; + op->info.owner = (__u64)(long)fl->fl_owner; /* async handling */ if (fl->fl_lmops && fl->fl_lmops->lm_grant) { op_data = kzalloc(sizeof(*op_data), GFP_NOFS); @@ -154,9 +155,6 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, goto out; } - /* fl_owner is lockd which doesn't distinguish - processes on the nfs client */ - op->info.owner = (__u64) fl->fl_pid; op_data->callback = fl->fl_lmops->lm_grant; locks_init_lock(&op_data->flc); locks_copy_lock(&op_data->flc, fl); @@ -168,8 +166,6 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, send_op(op); rv = FILE_LOCK_DEFERRED; goto out; - } else { - op->info.owner = (__u64)(long) fl->fl_owner; } send_op(op); @@ -326,10 +322,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; - if (fl->fl_lmops && fl->fl_lmops->lm_grant) - op->info.owner = (__u64) fl->fl_pid; - else - op->info.owner = (__u64)(long) fl->fl_owner; + op->info.owner = (__u64)(long)fl->fl_owner; if (fl->fl_flags & FL_CLOSE) { op->info.flags |= DLM_PLOCK_FL_CLOSE; @@ -389,7 +382,7 @@ int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file, info.number = number; info.start = fl->fl_start; info.end = fl->fl_end; - info.owner = (__u64)fl->fl_pid; + info.owner = (__u64)(long)fl->fl_owner; rv = do_lock_cancel(&info); switch (rv) { @@ -450,10 +443,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; - if (fl->fl_lmops && fl->fl_lmops->lm_grant) - op->info.owner = (__u64) fl->fl_pid; - else - op->info.owner = (__u64)(long) fl->fl_owner; + op->info.owner = (__u64)(long)fl->fl_owner; send_op(op); wait_event(recv_wq, (op->done != 0)); From 6bd4a2bfe568d963af721cc5efa52091bf1a3746 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 13 Nov 2023 16:24:10 -0500 Subject: [PATCH 3/6] dlm: use FL_SLEEP to determine blocking vs non-blocking This patch uses the FL_SLEEP flag in struct file_lock to determine if the lock request is a blocking or non-blocking request. Before dlm was using IS_SETLKW() was being used which is not usable for lock requests coming from lockd when EXPORT_OP_SAFE_ASYNC_LOCK inside the export flags is set. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/plock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index ee6e0236d4f8..d814c5121367 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -140,7 +140,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, op->info.optype = DLM_PLOCK_OP_LOCK; op->info.pid = fl->fl_pid; op->info.ex = (fl->fl_type == F_WRLCK); - op->info.wait = IS_SETLKW(cmd); + op->info.wait = !!(fl->fl_flags & FL_SLEEP); op->info.fsid = ls->ls_global_id; op->info.number = number; op->info.start = fl->fl_start; From 0c08699744d20ce0bac22b9f291a646a0302e51f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 13 Nov 2023 16:24:11 -0500 Subject: [PATCH 4/6] dlm: implement EXPORT_OP_ASYNC_LOCK This patch is activating the EXPORT_OP_ASYNC_LOCK export flag to signal lockd that both filesystems are able to handle async lock requests. The cluster filesystems gfs2 and ocfs2 will redirect their lock requests to DLMs plock implementation that can handle async lock requests. Reviewed-by: Jeff Layton Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/gfs2/export.c | 1 + fs/ocfs2/export.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index cf40895233f5..ef1013eff936 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -192,5 +192,6 @@ const struct export_operations gfs2_export_ops = { .fh_to_parent = gfs2_fh_to_parent, .get_name = gfs2_get_name, .get_parent = gfs2_get_parent, + .flags = EXPORT_OP_ASYNC_LOCK, }; diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index eaa8c80ace3c..b8b6a191b5cb 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -280,4 +280,5 @@ const struct export_operations ocfs2_export_ops = { .fh_to_dentry = ocfs2_fh_to_dentry, .fh_to_parent = ocfs2_fh_to_parent, .get_parent = ocfs2_get_parent, + .flags = EXPORT_OP_ASYNC_LOCK, }; From 367e753d5c54a414d82610eb709fe71fda6cf1c3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 20 Dec 2023 14:38:58 -0500 Subject: [PATCH 5/6] dlm: fix format seq ops type 4 This patch fixes to set the type 4 format ops in case of table_open4(). It got accidentially changed by commit 541adb0d4d10 ("fs: dlm: debugfs for queued callbacks") and since them toss debug dumps the same format as format 5 that are the queued ast callbacks for lkbs. Fixes: 541adb0d4d10 ("fs: dlm: debugfs for queued callbacks") Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/debug_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 42f332f46359..c587bfadeff4 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -748,7 +748,7 @@ static int table_open4(struct inode *inode, struct file *file) struct seq_file *seq; int ret; - ret = seq_open(file, &format5_seq_ops); + ret = seq_open(file, &format4_seq_ops); if (ret) return ret; From 5beebc1dda47719dac85830c53bca1a0ab497d96 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 20 Dec 2023 14:38:59 -0500 Subject: [PATCH 6/6] dlm: update format header reflect current format Over the time the dlm debugfs format string has been changed but the header wasn't updated. This patch changes the first line dump header and their meaning to reflect the current formats. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/debug_fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index c587bfadeff4..4fa11d9ddbb6 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -443,14 +443,14 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr) break; case 3: if (ri->header) { - seq_puts(seq, "version rsb 1.1 lvb 1.1 lkb 1.1\n"); + seq_puts(seq, "rsb ptr nodeid first_lkid flags !root_list_empty !recover_list_empty recover_locks_count len\n"); ri->header = 0; } print_format3(ri->rsb, seq); break; case 4: if (ri->header) { - seq_puts(seq, "version 4 rsb 2\n"); + seq_puts(seq, "rsb ptr nodeid master_nodeid dir_nodeid our_nodeid toss_time flags len str|hex name\n"); ri->header = 0; } print_format4(ri->rsb, seq);