mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-17 18:56:24 +00:00
xprtrdma: Provide a buffer to pad Write chunks of unaligned length
This is a buffer to be left persistently registered while a connection is up. Connection tear-down will automatically DMA-unmap, invalidate, and dereg the MR. A persistently registered buffer is lower in cost to provide, and it can never be coalesced into the RDMA segment that carries the data payload. An RPC that provisions a Write chunk with a non-aligned length now uses this MR rather than the tail buffer of the RPC's rq_rcv_buf. Reviewed-By: Tom Talpey <tom@talpey.com> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
parent
d5f458a979
commit
21037b8c22
@ -375,10 +375,16 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class,
|
||||
|
||||
TP_fast_assign(
|
||||
const struct rpcrdma_req *req = mr->mr_req;
|
||||
const struct rpc_task *task = req->rl_slot.rq_task;
|
||||
|
||||
__entry->task_id = task->tk_pid;
|
||||
__entry->client_id = task->tk_client->cl_clid;
|
||||
if (req) {
|
||||
const struct rpc_task *task = req->rl_slot.rq_task;
|
||||
|
||||
__entry->task_id = task->tk_pid;
|
||||
__entry->client_id = task->tk_client->cl_clid;
|
||||
} else {
|
||||
__entry->task_id = 0;
|
||||
__entry->client_id = -1;
|
||||
}
|
||||
__entry->mr_id = mr->mr_ibmr->res.id;
|
||||
__entry->nents = mr->mr_nents;
|
||||
__entry->handle = mr->mr_handle;
|
||||
@ -639,6 +645,7 @@ TRACE_EVENT(xprtrdma_nomrs_err,
|
||||
DEFINE_RDCH_EVENT(read);
|
||||
DEFINE_WRCH_EVENT(write);
|
||||
DEFINE_WRCH_EVENT(reply);
|
||||
DEFINE_WRCH_EVENT(wp);
|
||||
|
||||
TRACE_DEFINE_ENUM(rpcrdma_noch);
|
||||
TRACE_DEFINE_ENUM(rpcrdma_noch_pullup);
|
||||
|
@ -666,3 +666,38 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
|
||||
*/
|
||||
rpcrdma_force_disconnect(ep);
|
||||
}
|
||||
|
||||
/**
|
||||
* frwr_wp_create - Create an MR for padding Write chunks
|
||||
* @r_xprt: transport resources to use
|
||||
*
|
||||
* Return 0 on success, negative errno on failure.
|
||||
*/
|
||||
int frwr_wp_create(struct rpcrdma_xprt *r_xprt)
|
||||
{
|
||||
struct rpcrdma_ep *ep = r_xprt->rx_ep;
|
||||
struct rpcrdma_mr_seg seg;
|
||||
struct rpcrdma_mr *mr;
|
||||
|
||||
mr = rpcrdma_mr_get(r_xprt);
|
||||
if (!mr)
|
||||
return -EAGAIN;
|
||||
mr->mr_req = NULL;
|
||||
ep->re_write_pad_mr = mr;
|
||||
|
||||
seg.mr_len = XDR_UNIT;
|
||||
seg.mr_page = virt_to_page(ep->re_write_pad);
|
||||
seg.mr_offset = offset_in_page(ep->re_write_pad);
|
||||
if (IS_ERR(frwr_map(r_xprt, &seg, 1, true, xdr_zero, mr)))
|
||||
return -EIO;
|
||||
trace_xprtrdma_mr_fastreg(mr);
|
||||
|
||||
mr->mr_cqe.done = frwr_wc_fastreg;
|
||||
mr->mr_regwr.wr.next = NULL;
|
||||
mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
|
||||
mr->mr_regwr.wr.num_sge = 0;
|
||||
mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
|
||||
mr->mr_regwr.wr.send_flags = 0;
|
||||
|
||||
return ib_post_send(ep->re_id->qp, &mr->mr_regwr.wr, NULL);
|
||||
}
|
||||
|
@ -255,15 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
|
||||
page_base = 0;
|
||||
}
|
||||
|
||||
if (type == rpcrdma_readch)
|
||||
goto out;
|
||||
|
||||
/* When encoding a Write chunk, some servers need to see an
|
||||
* extra segment for non-XDR-aligned Write chunks. The upper
|
||||
* layer provides space in the tail iovec that may be used
|
||||
* for this purpose.
|
||||
*/
|
||||
if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup)
|
||||
if (type == rpcrdma_readch || type == rpcrdma_writech)
|
||||
goto out;
|
||||
|
||||
if (xdrbuf->tail[0].iov_len)
|
||||
@ -405,6 +397,7 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
|
||||
enum rpcrdma_chunktype wtype)
|
||||
{
|
||||
struct xdr_stream *xdr = &req->rl_stream;
|
||||
struct rpcrdma_ep *ep = r_xprt->rx_ep;
|
||||
struct rpcrdma_mr_seg *seg;
|
||||
struct rpcrdma_mr *mr;
|
||||
int nsegs, nchunks;
|
||||
@ -443,6 +436,18 @@ static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
|
||||
nsegs -= mr->mr_nents;
|
||||
} while (nsegs);
|
||||
|
||||
if (xdr_pad_size(rqst->rq_rcv_buf.page_len)) {
|
||||
if (encode_rdma_segment(xdr, ep->re_write_pad_mr) < 0)
|
||||
return -EMSGSIZE;
|
||||
|
||||
trace_xprtrdma_chunk_wp(rqst->rq_task, ep->re_write_pad_mr,
|
||||
nsegs);
|
||||
r_xprt->rx_stats.write_chunk_count++;
|
||||
r_xprt->rx_stats.total_rdma_request += mr->mr_length;
|
||||
nchunks++;
|
||||
nsegs -= mr->mr_nents;
|
||||
}
|
||||
|
||||
/* Update count of segments in this Write chunk */
|
||||
*segcount = cpu_to_be32(nchunks);
|
||||
|
||||
|
@ -551,6 +551,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
|
||||
goto out;
|
||||
}
|
||||
rpcrdma_mrs_create(r_xprt);
|
||||
frwr_wp_create(r_xprt);
|
||||
|
||||
out:
|
||||
trace_xprtrdma_connect(r_xprt, rc);
|
||||
|
@ -68,12 +68,14 @@
|
||||
/*
|
||||
* RDMA Endpoint -- connection endpoint details
|
||||
*/
|
||||
struct rpcrdma_mr;
|
||||
struct rpcrdma_ep {
|
||||
struct kref re_kref;
|
||||
struct rdma_cm_id *re_id;
|
||||
struct ib_pd *re_pd;
|
||||
unsigned int re_max_rdma_segs;
|
||||
unsigned int re_max_fr_depth;
|
||||
struct rpcrdma_mr *re_write_pad_mr;
|
||||
bool re_implicit_roundup;
|
||||
enum ib_mr_type re_mrtype;
|
||||
struct completion re_done;
|
||||
@ -97,6 +99,8 @@ struct rpcrdma_ep {
|
||||
unsigned int re_inline_recv; /* negotiated */
|
||||
|
||||
atomic_t re_completion_ids;
|
||||
|
||||
char re_write_pad[XDR_UNIT];
|
||||
};
|
||||
|
||||
/* Pre-allocate extra Work Requests for handling reverse-direction
|
||||
@ -535,6 +539,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
|
||||
void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
|
||||
void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
|
||||
void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
|
||||
int frwr_wp_create(struct rpcrdma_xprt *r_xprt);
|
||||
|
||||
/*
|
||||
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
|
||||
|
Loading…
x
Reference in New Issue
Block a user