NVMe: fix retry/error logic in nvme_queue_rq()

The logic around retrying and erroring IO in nvme_queue_rq() is broken
in a few ways:

- If we fail allocating dma memory for a discard, we return retry. We
  have the 'iod' stored in ->special, but we free the 'iod'.

- For a normal request, if we fail dma mapping of setting up prps, we
  have the same iod situation. Additionally, we haven't set the callback
  for the request yet, so we also potentially leak IOMMU resources.

Get rid of the ->special 'iod' store. The retry is uncommon enough that
it's not worth optimizing for or holding on to resources to attempt to
speed it up. Additionally, it's usually best practice to free any
request related resources when doing retries.

Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Jens Axboe 2014-12-11 13:58:39 -07:00
parent 285dffc910
commit fe54303ee2

View File

@ -621,24 +621,15 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
struct nvme_iod *iod; struct nvme_iod *iod;
int psegs = req->nr_phys_segments; int psegs = req->nr_phys_segments;
int result = BLK_MQ_RQ_QUEUE_BUSY;
enum dma_data_direction dma_dir; enum dma_data_direction dma_dir;
unsigned size = !(req->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(req) : unsigned size = !(req->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(req) :
sizeof(struct nvme_dsm_range); sizeof(struct nvme_dsm_range);
/*
* Requeued IO has already been prepped
*/
iod = req->special;
if (iod)
goto submit_iod;
iod = nvme_alloc_iod(psegs, size, ns->dev, GFP_ATOMIC); iod = nvme_alloc_iod(psegs, size, ns->dev, GFP_ATOMIC);
if (!iod) if (!iod)
return result; return BLK_MQ_RQ_QUEUE_BUSY;
iod->private = req; iod->private = req;
req->special = iod;
if (req->cmd_flags & REQ_DISCARD) { if (req->cmd_flags & REQ_DISCARD) {
void *range; void *range;
@ -651,7 +642,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
GFP_ATOMIC, GFP_ATOMIC,
&iod->first_dma); &iod->first_dma);
if (!range) if (!range)
goto finish_cmd; goto retry_cmd;
iod_list(iod)[0] = (__le64 *)range; iod_list(iod)[0] = (__le64 *)range;
iod->npages = 0; iod->npages = 0;
} else if (psegs) { } else if (psegs) {
@ -659,22 +650,22 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
sg_init_table(iod->sg, psegs); sg_init_table(iod->sg, psegs);
iod->nents = blk_rq_map_sg(req->q, req, iod->sg); iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
if (!iod->nents) { if (!iod->nents)
result = BLK_MQ_RQ_QUEUE_ERROR; goto error_cmd;
goto finish_cmd;
}
if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir)) if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir))
goto finish_cmd; goto retry_cmd;
if (blk_rq_bytes(req) != nvme_setup_prps(nvmeq->dev, iod, if (blk_rq_bytes(req) !=
blk_rq_bytes(req), GFP_ATOMIC)) nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
goto finish_cmd; dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg,
iod->nents, dma_dir);
goto retry_cmd;
}
} }
blk_mq_start_request(req); blk_mq_start_request(req);
submit_iod:
nvme_set_info(cmd, iod, req_completion); nvme_set_info(cmd, iod, req_completion);
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
if (req->cmd_flags & REQ_DISCARD) if (req->cmd_flags & REQ_DISCARD)
@ -688,10 +679,12 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
return BLK_MQ_RQ_QUEUE_OK; return BLK_MQ_RQ_QUEUE_OK;
finish_cmd: error_cmd:
nvme_finish_cmd(nvmeq, req->tag, NULL);
nvme_free_iod(nvmeq->dev, iod); nvme_free_iod(nvmeq->dev, iod);
return result; return BLK_MQ_RQ_QUEUE_ERROR;
retry_cmd:
nvme_free_iod(nvmeq->dev, iod);
return BLK_MQ_RQ_QUEUE_BUSY;
} }
static int nvme_process_cq(struct nvme_queue *nvmeq) static int nvme_process_cq(struct nvme_queue *nvmeq)