SUNRPC: close a rare race in xs_tcp_setup_socket.

We have one report of a crash in xs_tcp_setup_socket.
The call path to the crash is:

  xs_tcp_setup_socket -> inet_stream_connect -> lock_sock_nested.

The 'sock' passed to that last function is NULL.

The only way I can see this happening is a concurrent call to
xs_close:

  xs_close -> xs_reset_transport -> sock_release -> inet_release

inet_release sets:
   sock->sk = NULL;
inet_stream_connect calls
   lock_sock(sock->sk);
which gets NULL.

All calls to xs_close are protected by XPRT_LOCKED as are most
activations of the workqueue which runs xs_tcp_setup_socket.
The exception is xs_tcp_schedule_linger_timeout.

So presumably the timeout queued by the later fires exactly when some
other code runs xs_close().

To protect against this we can move the cancel_delayed_work_sync()
call from xs_destory() to xs_close().

As xs_close is never called from the worker scheduled on
->connect_worker, this can never deadlock.

Signed-off-by: NeilBrown <neilb@suse.de>
[Trond: Make it safe to call cancel_delayed_work_sync() on AF_LOCAL sockets]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
This commit is contained in:
NeilBrown 2013-10-31 16:14:36 +11:00 committed by Trond Myklebust
parent 09c3e54635
commit 93dc41bdc5

View File

@ -835,6 +835,8 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt); dprintk("RPC: xs_close xprt %p\n", xprt);
cancel_delayed_work_sync(&transport->connect_worker);
xs_reset_transport(transport); xs_reset_transport(transport);
xprt->reestablish_timeout = 0; xprt->reestablish_timeout = 0;
@ -869,12 +871,8 @@ static void xs_local_destroy(struct rpc_xprt *xprt)
*/ */
static void xs_destroy(struct rpc_xprt *xprt) static void xs_destroy(struct rpc_xprt *xprt)
{ {
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
dprintk("RPC: xs_destroy xprt %p\n", xprt); dprintk("RPC: xs_destroy xprt %p\n", xprt);
cancel_delayed_work_sync(&transport->connect_worker);
xs_local_destroy(xprt); xs_local_destroy(xprt);
} }
@ -1817,6 +1815,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
} }
#endif #endif
static void xs_dummy_setup_socket(struct work_struct *work)
{
}
static struct socket *xs_create_sock(struct rpc_xprt *xprt, static struct socket *xs_create_sock(struct rpc_xprt *xprt,
struct sock_xprt *transport, int family, int type, int protocol) struct sock_xprt *transport, int family, int type, int protocol)
{ {
@ -2668,6 +2670,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
xprt->ops = &xs_local_ops; xprt->ops = &xs_local_ops;
xprt->timeout = &xs_local_default_timeout; xprt->timeout = &xs_local_default_timeout;
INIT_DELAYED_WORK(&transport->connect_worker,
xs_dummy_setup_socket);
switch (sun->sun_family) { switch (sun->sun_family) {
case AF_LOCAL: case AF_LOCAL:
if (sun->sun_path[0] != '/') { if (sun->sun_path[0] != '/') {