mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-08 14:13:53 +00:00
632344b9ef
[ Upstream commita848c29e34
] On the node of an NFS client, some files saved in the mountpoint of the NFS server were copied to another location of the same NFS server. Accidentally, the nfs42_complete_copies() got a NULL-pointer dereference crash with the following syslog: [232064.838881] NFSv4: state recovery failed for open file nfs/pvc-12b5200d-cd0f-46a3-b9f0-af8f4fe0ef64.qcow2, error = -116 [232064.839360] NFSv4: state recovery failed for open file nfs/pvc-12b5200d-cd0f-46a3-b9f0-af8f4fe0ef64.qcow2, error = -116 [232066.588183] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058 [232066.588586] Mem abort info: [232066.588701] ESR = 0x0000000096000007 [232066.588862] EC = 0x25: DABT (current EL), IL = 32 bits [232066.589084] SET = 0, FnV = 0 [232066.589216] EA = 0, S1PTW = 0 [232066.589340] FSC = 0x07: level 3 translation fault [232066.589559] Data abort info: [232066.589683] ISV = 0, ISS = 0x00000007 [232066.589842] CM = 0, WnR = 0 [232066.589967] user pgtable: 64k pages, 48-bit VAs, pgdp=00002000956ff400 [232066.590231] [0000000000000058] pgd=08001100ae100003, p4d=08001100ae100003, pud=08001100ae100003, pmd=08001100b3c00003, pte=0000000000000000 [232066.590757] Internal error: Oops: 96000007 [#1] SMP [232066.590958] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm vhost_net vhost vhost_iotlb tap tun ipt_rpfilter xt_multiport ip_set_hash_ip ip_set_hash_net xfrm_interface xfrm6_tunnel tunnel4 tunnel6 esp4 ah4 wireguard libcurve25519_generic veth xt_addrtype xt_set nf_conntrack_netlink ip_set_hash_ipportnet ip_set_hash_ipportip ip_set_bitmap_port ip_set_hash_ipport dummy ip_set ip_vs_sh ip_vs_wrr ip_vs_rr ip_vs iptable_filter sch_ingress nfnetlink_cttimeout vport_gre ip_gre ip_tunnel gre vport_geneve geneve vport_vxlan vxlan ip6_udp_tunnel udp_tunnel openvswitch nf_conncount dm_round_robin dm_service_time dm_multipath xt_nat xt_MASQUERADE nft_chain_nat nf_nat xt_mark xt_conntrack xt_comment nft_compat nft_counter nf_tables nfnetlink ocfs2 ocfs2_nodemanager ocfs2_stackglue iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ipmi_ssif nbd overlay 8021q garp mrp bonding tls rfkill sunrpc ext4 mbcache jbd2 [232066.591052] vfat fat cas_cache cas_disk ses enclosure scsi_transport_sas sg acpi_ipmi ipmi_si ipmi_devintf ipmi_msghandler ip_tables vfio_pci vfio_pci_core vfio_virqfd vfio_iommu_type1 vfio dm_mirror dm_region_hash dm_log dm_mod nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter bridge stp llc fuse xfs libcrc32c ast drm_vram_helper qla2xxx drm_kms_helper syscopyarea crct10dif_ce sysfillrect ghash_ce sysimgblt sha2_ce fb_sys_fops cec sha256_arm64 sha1_ce drm_ttm_helper ttm nvme_fc igb sbsa_gwdt nvme_fabrics drm nvme_core i2c_algo_bit i40e scsi_transport_fc megaraid_sas aes_neon_bs [232066.596953] CPU: 6 PID: 4124696 Comm: 10.253.166.125- Kdump: loaded Not tainted 5.15.131-9.cl9_ocfs2.aarch64 #1 [232066.597356] Hardware name: Great Wall .\x93\x8e...RF6260 V5/GWMSSE2GL1T, BIOS T656FBE_V3.0.18 2024-01-06 [232066.597721] pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [232066.598034] pc : nfs4_reclaim_open_state+0x220/0x800 [nfsv4] [232066.598327] lr : nfs4_reclaim_open_state+0x12c/0x800 [nfsv4] [232066.598595] sp : ffff8000f568fc70 [232066.598731] x29: ffff8000f568fc70 x28: 0000000000001000 x27: ffff21003db33000 [232066.599030] x26: ffff800005521ae0 x25: ffff0100f98fa3f0 x24: 0000000000000001 [232066.599319] x23: ffff800009920008 x22: ffff21003db33040 x21: ffff21003db33050 [232066.599628] x20: ffff410172fe9e40 x19: ffff410172fe9e00 x18: 0000000000000000 [232066.599914] x17: 0000000000000000 x16: 0000000000000004 x15: 0000000000000000 [232066.600195] x14: 0000000000000000 x13: ffff800008e685a8 x12: 00000000eac0c6e6 [232066.600498] x11: 0000000000000000 x10: 0000000000000008 x9 : ffff8000054e5828 [232066.600784] x8 : 00000000ffffffbf x7 : 0000000000000001 x6 : 000000000a9eb14a [232066.601062] x5 : 0000000000000000 x4 : ffff70ff8a14a800 x3 : 0000000000000058 [232066.601348] x2 : 0000000000000001 x1 : 54dce46366daa6c6 x0 : 0000000000000000 [232066.601636] Call trace: [232066.601749] nfs4_reclaim_open_state+0x220/0x800 [nfsv4] [232066.601998] nfs4_do_reclaim+0x1b8/0x28c [nfsv4] [232066.602218] nfs4_state_manager+0x928/0x10f0 [nfsv4] [232066.602455] nfs4_run_state_manager+0x78/0x1b0 [nfsv4] [232066.602690] kthread+0x110/0x114 [232066.602830] ret_from_fork+0x10/0x20 [232066.602985] Code: 1400000d f9403f20 f9402e61 91016003 (f9402c00) [232066.603284] SMP: stopping secondary CPUs [232066.606936] Starting crashdump kernel... [232066.607146] Bye! Analysing the vmcore, we know that nfs4_copy_state listed by destination nfs_server->ss_copies was added by the field copies in handle_async_copy(), and we found a waiting copy process with the stack as: PID: 3511963 TASK: ffff710028b47e00 CPU: 0 COMMAND: "cp" #0 [ffff8001116ef740] __switch_to at ffff8000081b92f4 #1 [ffff8001116ef760] __schedule at ffff800008dd0650 #2 [ffff8001116ef7c0] schedule at ffff800008dd0a00 #3 [ffff8001116ef7e0] schedule_timeout at ffff800008dd6aa0 #4 [ffff8001116ef860] __wait_for_common at ffff800008dd166c #5 [ffff8001116ef8e0] wait_for_completion_interruptible at ffff800008dd1898 #6 [ffff8001116ef8f0] handle_async_copy at ffff8000055142f4 [nfsv4] #7 [ffff8001116ef970] _nfs42_proc_copy at ffff8000055147c8 [nfsv4] #8 [ffff8001116efa80] nfs42_proc_copy at ffff800005514cf0 [nfsv4] #9 [ffff8001116efc50] __nfs4_copy_file_range.constprop.0 at ffff8000054ed694 [nfsv4] The NULL-pointer dereference was due to nfs42_complete_copies() listed the nfs_server->ss_copies by the field ss_copies of nfs4_copy_state. So the nfs4_copy_state address ffff0100f98fa3f0 was offset by 0x10 and the data accessed through this pointer was also incorrect. Generally, the ordered list nfs4_state_owner->so_states indicate open(O_RDWR) or open(O_WRITE) states are reclaimed firstly by nfs4_reclaim_open_state(). When destination state reclaim is failed with NFS_STATE_RECOVERY_FAILED and copies are not deleted in nfs_server->ss_copies, the source state may be passed to the nfs42_complete_copies() process earlier, resulting in this crash scene finally. To solve this issue, we add a list_head nfs_server->ss_src_copies for a server-to-server copy specially. Fixes:0e65a32c8a
("NFS: handle source server reboot") Signed-off-by: Yanjun Zhang <zhangyanjun@cestc.cn> Reviewed-by: Trond Myklebust <trond.myklebust@hammerspace.com> Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
294 lines
10 KiB
C
294 lines
10 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _NFS_FS_SB
|
|
#define _NFS_FS_SB
|
|
|
|
#include <linux/list.h>
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/nfs_xdr.h>
|
|
#include <linux/sunrpc/xprt.h>
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/refcount.h>
|
|
|
|
struct nfs4_session;
|
|
struct nfs_iostats;
|
|
struct nlm_host;
|
|
struct nfs4_sequence_args;
|
|
struct nfs4_sequence_res;
|
|
struct nfs_server;
|
|
struct nfs4_minor_version_ops;
|
|
struct nfs41_server_scope;
|
|
struct nfs41_impl_id;
|
|
|
|
/*
|
|
* The nfs_client identifies our client state to the server.
|
|
*/
|
|
struct nfs_client {
|
|
refcount_t cl_count;
|
|
atomic_t cl_mds_count;
|
|
int cl_cons_state; /* current construction state (-ve: init error) */
|
|
#define NFS_CS_READY 0 /* ready to be used */
|
|
#define NFS_CS_INITING 1 /* busy initialising */
|
|
#define NFS_CS_SESSION_INITING 2 /* busy initialising session */
|
|
unsigned long cl_res_state; /* NFS resources state */
|
|
#define NFS_CS_CALLBACK 1 /* - callback started */
|
|
#define NFS_CS_IDMAP 2 /* - idmap started */
|
|
#define NFS_CS_RENEWD 3 /* - renewd started */
|
|
#define NFS_CS_STOP_RENEW 4 /* no more state to renew */
|
|
#define NFS_CS_CHECK_LEASE_TIME 5 /* need to check lease time */
|
|
unsigned long cl_flags; /* behavior switches */
|
|
#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */
|
|
#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */
|
|
#define NFS_CS_MIGRATION 2 /* - transparent state migr */
|
|
#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */
|
|
#define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */
|
|
#define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */
|
|
#define NFS_CS_NOPING 6 /* - don't ping on connect */
|
|
#define NFS_CS_DS 7 /* - Server is a DS */
|
|
#define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */
|
|
#define NFS_CS_PNFS 9 /* - Server used for pnfs */
|
|
struct sockaddr_storage cl_addr; /* server identifier */
|
|
size_t cl_addrlen;
|
|
char * cl_hostname; /* hostname of server */
|
|
char * cl_acceptor; /* GSSAPI acceptor name */
|
|
struct list_head cl_share_link; /* link in global client list */
|
|
struct list_head cl_superblocks; /* List of nfs_server structs */
|
|
|
|
struct rpc_clnt * cl_rpcclient;
|
|
const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */
|
|
int cl_proto; /* Network transport protocol */
|
|
struct nfs_subversion * cl_nfs_mod; /* pointer to nfs version module */
|
|
|
|
u32 cl_minorversion;/* NFSv4 minorversion */
|
|
unsigned int cl_nconnect; /* Number of connections */
|
|
unsigned int cl_max_connect; /* max number of xprts allowed */
|
|
const char * cl_principal; /* used for machine cred */
|
|
|
|
#if IS_ENABLED(CONFIG_NFS_V4)
|
|
struct list_head cl_ds_clients; /* auth flavor data servers */
|
|
u64 cl_clientid; /* constant */
|
|
nfs4_verifier cl_confirm; /* Clientid verifier */
|
|
unsigned long cl_state;
|
|
|
|
spinlock_t cl_lock;
|
|
|
|
unsigned long cl_lease_time;
|
|
unsigned long cl_last_renewal;
|
|
struct delayed_work cl_renewd;
|
|
|
|
struct rpc_wait_queue cl_rpcwaitq;
|
|
|
|
/* idmapper */
|
|
struct idmap * cl_idmap;
|
|
|
|
/* Client owner identifier */
|
|
const char * cl_owner_id;
|
|
|
|
u32 cl_cb_ident; /* v4.0 callback identifier */
|
|
const struct nfs4_minor_version_ops *cl_mvops;
|
|
unsigned long cl_mig_gen;
|
|
|
|
/* NFSv4.0 transport blocking */
|
|
struct nfs4_slot_table *cl_slot_tbl;
|
|
|
|
/* The sequence id to use for the next CREATE_SESSION */
|
|
u32 cl_seqid;
|
|
/* The flags used for obtaining the clientid during EXCHANGE_ID */
|
|
u32 cl_exchange_flags;
|
|
struct nfs4_session *cl_session; /* shared session */
|
|
bool cl_preserve_clid;
|
|
struct nfs41_server_owner *cl_serverowner;
|
|
struct nfs41_server_scope *cl_serverscope;
|
|
struct nfs41_impl_id *cl_implid;
|
|
/* nfs 4.1+ state protection modes: */
|
|
unsigned long cl_sp4_flags;
|
|
#define NFS_SP4_MACH_CRED_MINIMAL 1 /* Minimal sp4_mach_cred - state ops
|
|
* must use machine cred */
|
|
#define NFS_SP4_MACH_CRED_CLEANUP 2 /* CLOSE and LOCKU */
|
|
#define NFS_SP4_MACH_CRED_SECINFO 3 /* SECINFO and SECINFO_NO_NAME */
|
|
#define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */
|
|
#define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */
|
|
#define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */
|
|
#define NFS_SP4_MACH_CRED_PNFS_CLEANUP 7 /* LAYOUTRETURN */
|
|
#if IS_ENABLED(CONFIG_NFS_V4_1)
|
|
wait_queue_head_t cl_lock_waitq;
|
|
#endif /* CONFIG_NFS_V4_1 */
|
|
#endif /* CONFIG_NFS_V4 */
|
|
|
|
/* Our own IP address, as a null-terminated string.
|
|
* This is used to generate the mv0 callback address.
|
|
*/
|
|
char cl_ipaddr[48];
|
|
struct net *cl_net;
|
|
struct list_head pending_cb_stateids;
|
|
};
|
|
|
|
/*
|
|
* NFS client parameters stored in the superblock.
|
|
*/
|
|
struct nfs_server {
|
|
struct nfs_client * nfs_client; /* shared client and NFS4 state */
|
|
struct list_head client_link; /* List of other nfs_server structs
|
|
* that share the same client
|
|
*/
|
|
struct list_head master_link; /* link in master servers list */
|
|
struct rpc_clnt * client; /* RPC client handle */
|
|
struct rpc_clnt * client_acl; /* ACL RPC client handle */
|
|
struct nlm_host *nlm_host; /* NLM client handle */
|
|
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
|
|
atomic_long_t writeback; /* number of writeback pages */
|
|
unsigned int write_congested;/* flag set when writeback gets too high */
|
|
unsigned int flags; /* various flags */
|
|
|
|
/* The following are for internal use only. Also see uapi/linux/nfs_mount.h */
|
|
#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
|
|
#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000
|
|
#define NFS_MOUNT_NORESVPORT 0x40000
|
|
#define NFS_MOUNT_LEGACY_INTERFACE 0x80000
|
|
#define NFS_MOUNT_LOCAL_FLOCK 0x100000
|
|
#define NFS_MOUNT_LOCAL_FCNTL 0x200000
|
|
#define NFS_MOUNT_SOFTERR 0x400000
|
|
#define NFS_MOUNT_SOFTREVAL 0x800000
|
|
#define NFS_MOUNT_WRITE_EAGER 0x01000000
|
|
#define NFS_MOUNT_WRITE_WAIT 0x02000000
|
|
#define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000
|
|
|
|
unsigned int fattr_valid; /* Valid attributes */
|
|
unsigned int caps; /* server capabilities */
|
|
unsigned int rsize; /* read size */
|
|
unsigned int rpages; /* read size (in pages) */
|
|
unsigned int wsize; /* write size */
|
|
unsigned int wpages; /* write size (in pages) */
|
|
unsigned int wtmult; /* server disk block size */
|
|
unsigned int dtsize; /* readdir size */
|
|
unsigned short port; /* "port=" setting */
|
|
unsigned int bsize; /* server block size */
|
|
#ifdef CONFIG_NFS_V4_2
|
|
unsigned int gxasize; /* getxattr size */
|
|
unsigned int sxasize; /* setxattr size */
|
|
unsigned int lxasize; /* listxattr size */
|
|
#endif
|
|
unsigned int acregmin; /* attr cache timeouts */
|
|
unsigned int acregmax;
|
|
unsigned int acdirmin;
|
|
unsigned int acdirmax;
|
|
unsigned int namelen;
|
|
unsigned int options; /* extra options enabled by mount */
|
|
unsigned int clone_blksize; /* granularity of a CLONE operation */
|
|
#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
|
|
#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
|
|
|
|
enum nfs4_change_attr_type
|
|
change_attr_type;/* Description of change attribute */
|
|
|
|
struct nfs_fsid fsid;
|
|
__u64 maxfilesize; /* maximum file size */
|
|
struct timespec64 time_delta; /* smallest time granularity */
|
|
unsigned long mount_time; /* when this fs was mounted */
|
|
struct super_block *super; /* VFS super block */
|
|
dev_t s_dev; /* superblock dev numbers */
|
|
struct nfs_auth_info auth_info; /* parsed auth flavors */
|
|
|
|
#ifdef CONFIG_NFS_FSCACHE
|
|
struct fscache_volume *fscache; /* superblock cookie */
|
|
char *fscache_uniq; /* Uniquifier (or NULL) */
|
|
#endif
|
|
|
|
u32 pnfs_blksize; /* layout_blksize attr */
|
|
#if IS_ENABLED(CONFIG_NFS_V4)
|
|
u32 attr_bitmask[3];/* V4 bitmask representing the set
|
|
of attributes supported on this
|
|
filesystem */
|
|
u32 attr_bitmask_nl[3];
|
|
/* V4 bitmask representing the
|
|
set of attributes supported
|
|
on this filesystem excluding
|
|
the label support bit. */
|
|
u32 exclcreat_bitmask[3];
|
|
/* V4 bitmask representing the
|
|
set of attributes supported
|
|
on this filesystem for the
|
|
exclusive create. */
|
|
u32 cache_consistency_bitmask[3];
|
|
/* V4 bitmask representing the subset
|
|
of change attribute, size, ctime
|
|
and mtime attributes supported by
|
|
the server */
|
|
u32 acl_bitmask; /* V4 bitmask representing the ACEs
|
|
that are supported on this
|
|
filesystem */
|
|
u32 fh_expire_type; /* V4 bitmask representing file
|
|
handle volatility type for
|
|
this filesystem */
|
|
struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */
|
|
struct rpc_wait_queue roc_rpcwaitq;
|
|
void *pnfs_ld_data; /* per mount point data */
|
|
|
|
/* the following fields are protected by nfs_client->cl_lock */
|
|
struct rb_root state_owners;
|
|
#endif
|
|
struct ida openowner_id;
|
|
struct ida lockowner_id;
|
|
struct list_head state_owners_lru;
|
|
struct list_head layouts;
|
|
struct list_head delegations;
|
|
struct list_head ss_copies;
|
|
struct list_head ss_src_copies;
|
|
|
|
unsigned long mig_gen;
|
|
unsigned long mig_status;
|
|
#define NFS_MIG_IN_TRANSITION (1)
|
|
#define NFS_MIG_FAILED (2)
|
|
#define NFS_MIG_TSM_POSSIBLE (3)
|
|
|
|
void (*destroy)(struct nfs_server *);
|
|
|
|
atomic_t active; /* Keep trace of any activity to this server */
|
|
|
|
/* mountd-related mount options */
|
|
struct sockaddr_storage mountd_address;
|
|
size_t mountd_addrlen;
|
|
u32 mountd_version;
|
|
unsigned short mountd_port;
|
|
unsigned short mountd_protocol;
|
|
struct rpc_wait_queue uoc_rpcwaitq;
|
|
|
|
/* XDR related information */
|
|
unsigned int read_hdrsize;
|
|
|
|
/* User namespace info */
|
|
const struct cred *cred;
|
|
bool has_sec_mnt_opts;
|
|
};
|
|
|
|
/* Server capabilities */
|
|
#define NFS_CAP_READDIRPLUS (1U << 0)
|
|
#define NFS_CAP_HARDLINKS (1U << 1)
|
|
#define NFS_CAP_SYMLINKS (1U << 2)
|
|
#define NFS_CAP_ACLS (1U << 3)
|
|
#define NFS_CAP_ATOMIC_OPEN (1U << 4)
|
|
#define NFS_CAP_LGOPEN (1U << 5)
|
|
#define NFS_CAP_CASE_INSENSITIVE (1U << 6)
|
|
#define NFS_CAP_CASE_PRESERVING (1U << 7)
|
|
#define NFS_CAP_POSIX_LOCK (1U << 14)
|
|
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
|
|
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
|
|
#define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17)
|
|
#define NFS_CAP_SECURITY_LABEL (1U << 18)
|
|
#define NFS_CAP_SEEK (1U << 19)
|
|
#define NFS_CAP_ALLOCATE (1U << 20)
|
|
#define NFS_CAP_DEALLOCATE (1U << 21)
|
|
#define NFS_CAP_LAYOUTSTATS (1U << 22)
|
|
#define NFS_CAP_CLONE (1U << 23)
|
|
#define NFS_CAP_COPY (1U << 24)
|
|
#define NFS_CAP_OFFLOAD_CANCEL (1U << 25)
|
|
#define NFS_CAP_LAYOUTERROR (1U << 26)
|
|
#define NFS_CAP_COPY_NOTIFY (1U << 27)
|
|
#define NFS_CAP_XATTR (1U << 28)
|
|
#define NFS_CAP_READ_PLUS (1U << 29)
|
|
#define NFS_CAP_FS_LOCATIONS (1U << 30)
|
|
#define NFS_CAP_MOVEABLE (1U << 31)
|
|
#endif
|