RDMA/cxgb4: Fix endpoint mutex deadlocks

In cases where the cm calls c4iw_modify_rc_qp() with the endpoint
mutex held, they must be called with internal == 1.  rx_data() and
process_mpa_reply() are not doing this.  This causes a deadlock
because c4iw_modify_rc_qp() might call c4iw_ep_disconnect() in some
!internal cases, and c4iw_ep_disconnect() acquires the endpoint mutex.
The design was intended to only do the disconnect for !internal calls.

Change rx_data(), FPDU_MODE case, to call c4iw_modify_rc_qp() with
internal == 1, and then disconnect only after releasing the mutex.

Change process_mpa_reply() to call c4iw_modify_rc_qp(TERMINATE) with
internal == 1 and set a new attr flag telling it to send a TERMINATE
message.  Previously this was implied by !internal.

Change process_mpa_reply() to return whether the caller should
disconnect after releasing the endpoint mutex.  Now rx_data() will do
the disconnect in the cases where process_mpa_reply() wants to
disconnect after the TERMINATE is sent.

Change c4iw_modify_rc_qp() RTS->TERM to only disconnect if !internal,
and to send a TERMINATE message if attrs->send_term is 1.

Change abort_connection() to not aquire the ep mutex for setting the
state, and make all calls to abort_connection() do so with the mutex
held.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
Steve Wise 2014-04-24 14:31:53 -05:00 committed by Roland Dreier
parent d1db0eea85
commit cc18b939e1
3 changed files with 26 additions and 15 deletions

View File

@ -996,7 +996,7 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status)
static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
{ {
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
state_set(&ep->com, ABORTING); __state_set(&ep->com, ABORTING);
set_bit(ABORT_CONN, &ep->com.history); set_bit(ABORT_CONN, &ep->com.history);
return send_abort(ep, skb, gfp); return send_abort(ep, skb, gfp);
} }
@ -1154,7 +1154,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
return credits; return credits;
} }
static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
{ {
struct mpa_message *mpa; struct mpa_message *mpa;
struct mpa_v2_conn_params *mpa_v2_params; struct mpa_v2_conn_params *mpa_v2_params;
@ -1164,6 +1164,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
struct c4iw_qp_attributes attrs; struct c4iw_qp_attributes attrs;
enum c4iw_qp_attr_mask mask; enum c4iw_qp_attr_mask mask;
int err; int err;
int disconnect = 0;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
@ -1173,7 +1174,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* will abort the connection. * will abort the connection.
*/ */
if (stop_ep_timer(ep)) if (stop_ep_timer(ep))
return; return 0;
/* /*
* If we get more than the supported amount of private data * If we get more than the supported amount of private data
@ -1195,7 +1196,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* if we don't even have the mpa message, then bail. * if we don't even have the mpa message, then bail.
*/ */
if (ep->mpa_pkt_len < sizeof(*mpa)) if (ep->mpa_pkt_len < sizeof(*mpa))
return; return 0;
mpa = (struct mpa_message *) ep->mpa_pkt; mpa = (struct mpa_message *) ep->mpa_pkt;
/* Validate MPA header. */ /* Validate MPA header. */
@ -1235,7 +1236,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* We'll continue process when more data arrives. * We'll continue process when more data arrives.
*/ */
if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
return; return 0;
if (mpa->flags & MPA_REJECT) { if (mpa->flags & MPA_REJECT) {
err = -ECONNREFUSED; err = -ECONNREFUSED;
@ -1337,9 +1338,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR; attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE; attrs.next_state = C4IW_QP_STATE_TERMINATE;
attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM; err = -ENOMEM;
disconnect = 1;
goto out; goto out;
} }
@ -1355,9 +1358,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD; attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE; attrs.next_state = C4IW_QP_STATE_TERMINATE;
attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM; err = -ENOMEM;
disconnect = 1;
goto out; goto out;
} }
goto out; goto out;
@ -1366,7 +1371,7 @@ err:
send_abort(ep, skb, GFP_KERNEL); send_abort(ep, skb, GFP_KERNEL);
out: out:
connect_reply_upcall(ep, err); connect_reply_upcall(ep, err);
return; return disconnect;
} }
static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
@ -1524,6 +1529,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int tid = GET_TID(hdr); unsigned int tid = GET_TID(hdr);
struct tid_info *t = dev->rdev.lldi.tids; struct tid_info *t = dev->rdev.lldi.tids;
__u8 status = hdr->status; __u8 status = hdr->status;
int disconnect = 0;
ep = lookup_tid(t, tid); ep = lookup_tid(t, tid);
if (!ep) if (!ep)
@ -1539,7 +1545,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
switch (ep->com.state) { switch (ep->com.state) {
case MPA_REQ_SENT: case MPA_REQ_SENT:
ep->rcv_seq += dlen; ep->rcv_seq += dlen;
process_mpa_reply(ep, skb); disconnect = process_mpa_reply(ep, skb);
break; break;
case MPA_REQ_WAIT: case MPA_REQ_WAIT:
ep->rcv_seq += dlen; ep->rcv_seq += dlen;
@ -1555,13 +1561,16 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
ep->com.state, ep->hwtid, status); ep->com.state, ep->hwtid, status);
attrs.next_state = C4IW_QP_STATE_TERMINATE; attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
disconnect = 1;
break; break;
} }
default: default:
break; break;
} }
mutex_unlock(&ep->com.mutex); mutex_unlock(&ep->com.mutex);
if (disconnect)
c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
return 0; return 0;
} }
@ -3482,9 +3491,9 @@ static void process_timeout(struct c4iw_ep *ep)
__func__, ep, ep->hwtid, ep->com.state); __func__, ep, ep->hwtid, ep->com.state);
abort = 0; abort = 0;
} }
mutex_unlock(&ep->com.mutex);
if (abort) if (abort)
abort_connection(ep, NULL, GFP_KERNEL); abort_connection(ep, NULL, GFP_KERNEL);
mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com); c4iw_put_ep(&ep->com);
} }

View File

@ -435,6 +435,7 @@ struct c4iw_qp_attributes {
u8 ecode; u8 ecode;
u16 sq_db_inc; u16 sq_db_inc;
u16 rq_db_inc; u16 rq_db_inc;
u8 send_term;
}; };
struct c4iw_qp { struct c4iw_qp {

View File

@ -1388,11 +1388,12 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode; qhp->attr.ecode = attrs->ecode;
ep = qhp->ep; ep = qhp->ep;
disconnect = 1; if (!internal) {
c4iw_get_ep(&qhp->ep->com); c4iw_get_ep(&qhp->ep->com);
if (!internal)
terminate = 1; terminate = 1;
else { disconnect = 1;
} else {
terminate = qhp->attr.send_term;
ret = rdma_fini(rhp, qhp, ep); ret = rdma_fini(rhp, qhp, ep);
if (ret) if (ret)
goto err; goto err;