diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 0715ce93daef..58fc8684788d 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -69,50 +69,6 @@ config MQ_IOSCHED_DEADLINE ---help--- MQ version of the deadline IO scheduler. -config MQ_IOSCHED_NONE - bool - default y - -choice - prompt "Default single-queue blk-mq I/O scheduler" - default DEFAULT_SQ_NONE - help - Select the I/O scheduler which will be used by default for blk-mq - managed block devices with a single queue. - - config DEFAULT_SQ_DEADLINE - bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y - - config DEFAULT_SQ_NONE - bool "None" - -endchoice - -config DEFAULT_SQ_IOSCHED - string - default "mq-deadline" if DEFAULT_SQ_DEADLINE - default "none" if DEFAULT_SQ_NONE - -choice - prompt "Default multi-queue blk-mq I/O scheduler" - default DEFAULT_MQ_NONE - help - Select the I/O scheduler which will be used by default for blk-mq - managed block devices with multiple queues. - - config DEFAULT_MQ_DEADLINE - bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y - - config DEFAULT_MQ_NONE - bool "None" - -endchoice - -config DEFAULT_MQ_IOSCHED - string - default "mq-deadline" if DEFAULT_MQ_DEADLINE - default "none" if DEFAULT_MQ_NONE - endmenu endif diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 9e8d6795a8c1..98c7b061781e 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -205,7 +205,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) * needing a restart in that case. */ if (!list_empty(&rq_list)) { - blk_mq_sched_mark_restart(hctx); + blk_mq_sched_mark_restart_hctx(hctx); did_work = blk_mq_dispatch_rq_list(hctx, &rq_list); } else if (!has_sched_dispatch) { blk_mq_flush_busy_ctxs(hctx, &rq_list); @@ -331,20 +331,16 @@ static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx) { + struct request_queue *q = hctx->queue; unsigned int i; - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { + if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_sched_restart_hctx(hctx); + } + } else { blk_mq_sched_restart_hctx(hctx); - else { - struct request_queue *q = hctx->queue; - - if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) - return; - - clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags); - - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_restart_hctx(hctx); } } @@ -498,15 +494,6 @@ int blk_mq_sched_init(struct request_queue *q) { int ret; -#if defined(CONFIG_DEFAULT_SQ_NONE) - if (q->nr_hw_queues == 1) - return 0; -#endif -#if defined(CONFIG_DEFAULT_MQ_NONE) - if (q->nr_hw_queues > 1) - return 0; -#endif - mutex_lock(&q->sysfs_lock); ret = elevator_init(q, NULL); mutex_unlock(&q->sysfs_lock); diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 7b5f3b95c78e..a75b16b123f7 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -122,17 +122,27 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) return false; } -static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx) +/* + * Mark a hardware queue as needing a restart. + */ +static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) { - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (hctx->flags & BLK_MQ_F_TAG_SHARED) { - struct request_queue *q = hctx->queue; +} - if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) - set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); - } - } +/* + * Mark a hardware queue and the request queue it belongs to as needing a + * restart. + */ +static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx) +{ + struct request_queue *q = hctx->queue; + + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) + set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); } static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) diff --git a/block/blk-mq.c b/block/blk-mq.c index b29e7dc7b309..9e6b064e5339 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -904,6 +904,44 @@ static bool reorder_tags_to_front(struct list_head *list) return first != NULL; } +static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags, + void *key) +{ + struct blk_mq_hw_ctx *hctx; + + hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); + + list_del(&wait->task_list); + clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state); + blk_mq_run_hw_queue(hctx, true); + return 1; +} + +static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx) +{ + struct sbq_wait_state *ws; + + /* + * The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait. + * The thread which wins the race to grab this bit adds the hardware + * queue to the wait queue. + */ + if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) || + test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state)) + return false; + + init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); + ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx); + + /* + * As soon as this returns, it's no longer safe to fiddle with + * hctx->dispatch_wait, since a completion can wake up the wait queue + * and unlock the bit. + */ + add_wait_queue(&ws->wait, &hctx->dispatch_wait); + return true; +} + bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) { struct request_queue *q = hctx->queue; @@ -931,15 +969,22 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) continue; /* - * We failed getting a driver tag. Mark the queue(s) - * as needing a restart. Retry getting a tag again, - * in case the needed IO completed right before we - * marked the queue as needing a restart. + * The initial allocation attempt failed, so we need to + * rerun the hardware queue when a tag is freed. */ - blk_mq_sched_mark_restart(hctx); - if (!blk_mq_get_driver_tag(rq, &hctx, false)) + if (blk_mq_dispatch_wait_add(hctx)) { + /* + * It's possible that a tag was freed in the + * window between the allocation failure and + * adding the hardware queue to the wait queue. + */ + if (!blk_mq_get_driver_tag(rq, &hctx, false)) + break; + } else { break; + } } + list_del_init(&rq->queuelist); bd.rq = rq; @@ -995,10 +1040,11 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) * * blk_mq_run_hw_queue() already checks the STOPPED bit * - * If RESTART is set, then let completion restart the queue - * instead of potentially looping here. + * If RESTART or TAG_WAITING is set, then let completion restart + * the queue instead of potentially looping here. */ - if (!blk_mq_sched_needs_restart(hctx)) + if (!blk_mq_sched_needs_restart(hctx) && + !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state)) blk_mq_run_hw_queue(hctx, true); } diff --git a/block/elevator.c b/block/elevator.c index 699d10f71a2c..ac1c9f481a98 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -220,17 +220,24 @@ int elevator_init(struct request_queue *q, char *name) } if (!e) { - if (q->mq_ops && q->nr_hw_queues == 1) - e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false); - else if (q->mq_ops) - e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false); - else + /* + * For blk-mq devices, we default to using mq-deadline, + * if available, for single queue devices. If deadline + * isn't available OR we have multiple queues, default + * to "none". + */ + if (q->mq_ops) { + if (q->nr_hw_queues == 1) + e = elevator_get("mq-deadline", false); + if (!e) + return 0; + } else e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); if (!e) { printk(KERN_ERR "Default I/O scheduler not found. " \ - "Using noop/none.\n"); + "Using noop.\n"); e = elevator_get("noop", false); } } diff --git a/block/genhd.c b/block/genhd.c index 3631cd480295..2f444b87a5f2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -669,14 +669,14 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { - bdev_unhash_inode(MKDEV(disk->major, - disk->first_minor + part->partno)); invalidate_partition(disk, part->partno); + bdev_unhash_inode(part_devt(part)); delete_partition(disk, part->partno); } disk_part_iter_exit(&piter); invalidate_partition(disk, 0); + bdev_unhash_inode(disk_devt(disk)); set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; diff --git a/block/sed-opal.c b/block/sed-opal.c index d1c52ba4d62d..1e18dca360fc 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -34,7 +34,11 @@ #define IO_BUFFER_LENGTH 2048 #define MAX_TOKS 64 -typedef int (*opal_step)(struct opal_dev *dev); +struct opal_step { + int (*fn)(struct opal_dev *dev, void *data); + void *data; +}; +typedef int (cont_fn)(struct opal_dev *dev); enum opal_atom_width { OPAL_WIDTH_TINY, @@ -80,9 +84,7 @@ struct opal_dev { void *data; sec_send_recv *send_recv; - const opal_step *funcs; - void **func_data; - int state; + const struct opal_step *steps; struct mutex dev_lock; u16 comid; u32 hsn; @@ -213,8 +215,6 @@ static const u8 opalmethod[][OPAL_UID_LENGTH] = { { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x03 }, }; -typedef int (cont_fn)(struct opal_dev *dev); - static int end_opal_session_error(struct opal_dev *dev); struct opal_suspend_data { @@ -375,18 +375,18 @@ static void check_geometry(struct opal_dev *dev, const void *data) static int next(struct opal_dev *dev) { - opal_step func; - int error = 0; + const struct opal_step *step; + int state = 0, error = 0; do { - func = dev->funcs[dev->state]; - if (!func) + step = &dev->steps[state]; + if (!step->fn) break; - error = func(dev); + error = step->fn(dev, step->data); if (error) { pr_err("Error on step function: %d with error %d: %s\n", - dev->state, error, + state, error, opal_error_to_human(error)); /* For each OPAL command we do a discovery0 then we @@ -396,10 +396,13 @@ static int next(struct opal_dev *dev) * session. Therefore we shouldn't attempt to terminate * a session, as one has not yet been created. */ - if (dev->state > 1) - return end_opal_session_error(dev); + if (state > 1) { + end_opal_session_error(dev); + return error; + } + } - dev->state++; + state++; } while (!error); return error; @@ -411,10 +414,17 @@ static int opal_discovery0_end(struct opal_dev *dev) const struct d0_header *hdr = (struct d0_header *)dev->resp; const u8 *epos = dev->resp, *cpos = dev->resp; u16 comid = 0; + u32 hlen = be32_to_cpu(hdr->length); - print_buffer(dev->resp, be32_to_cpu(hdr->length)); + print_buffer(dev->resp, hlen); - epos += be32_to_cpu(hdr->length); /* end of buffer */ + if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) { + pr_warn("Discovery length overflows buffer (%zu+%u)/%u\n", + sizeof(*hdr), hlen, IO_BUFFER_LENGTH); + return -EFAULT; + } + + epos += hlen; /* end of buffer */ cpos += sizeof(*hdr); /* current position on buffer */ while (cpos < epos && supported) { @@ -476,7 +486,7 @@ static int opal_discovery0_end(struct opal_dev *dev) return 0; } -static int opal_discovery0(struct opal_dev *dev) +static int opal_discovery0(struct opal_dev *dev, void *data) { int ret; @@ -662,52 +672,29 @@ static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn) return 0; } -static enum opal_response_token token_type(const struct parsed_resp *resp, - int n) +static const struct opal_resp_tok *response_get_token( + const struct parsed_resp *resp, + int n) { const struct opal_resp_tok *tok; if (n >= resp->num) { pr_err("Token number doesn't exist: %d, resp: %d\n", n, resp->num); - return OPAL_DTA_TOKENID_INVALID; + return ERR_PTR(-EINVAL); } tok = &resp->toks[n]; if (tok->len == 0) { pr_err("Token length must be non-zero\n"); - return OPAL_DTA_TOKENID_INVALID; + return ERR_PTR(-EINVAL); } - return tok->type; + return tok; } -/* - * This function returns 0 in case of invalid token. One should call - * token_type() first to find out if the token is valid or not. - */ -static enum opal_token response_get_token(const struct parsed_resp *resp, - int n) -{ - const struct opal_resp_tok *tok; - - if (n >= resp->num) { - pr_err("Token number doesn't exist: %d, resp: %d\n", - n, resp->num); - return 0; - } - - tok = &resp->toks[n]; - if (tok->len == 0) { - pr_err("Token length must be non-zero\n"); - return 0; - } - - return tok->pos[0]; -} - -static size_t response_parse_tiny(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_tiny(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = 1; @@ -723,8 +710,8 @@ static size_t response_parse_tiny(struct opal_resp_tok *tok, return tok->len; } -static size_t response_parse_short(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_short(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = (pos[0] & SHORT_ATOM_LEN_MASK) + 1; @@ -736,7 +723,7 @@ static size_t response_parse_short(struct opal_resp_tok *tok, tok->type = OPAL_DTA_TOKENID_SINT; } else { u64 u_integer = 0; - int i, b = 0; + ssize_t i, b = 0; tok->type = OPAL_DTA_TOKENID_UINT; if (tok->len > 9) { @@ -753,8 +740,8 @@ static size_t response_parse_short(struct opal_resp_tok *tok, return tok->len; } -static size_t response_parse_medium(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_medium(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = (((pos[0] & MEDIUM_ATOM_LEN_MASK) << 8) | pos[1]) + 2; @@ -770,8 +757,8 @@ static size_t response_parse_medium(struct opal_resp_tok *tok, return tok->len; } -static size_t response_parse_long(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_long(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = ((pos[1] << 16) | (pos[2] << 8) | pos[3]) + 4; @@ -787,8 +774,8 @@ static size_t response_parse_long(struct opal_resp_tok *tok, return tok->len; } -static size_t response_parse_token(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_token(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = 1; @@ -805,8 +792,9 @@ static int response_parse(const u8 *buf, size_t length, struct opal_resp_tok *iter; int num_entries = 0; int total; - size_t token_length; + ssize_t token_length; const u8 *pos; + u32 clen, plen, slen; if (!buf) return -EFAULT; @@ -818,17 +806,16 @@ static int response_parse(const u8 *buf, size_t length, pos = buf; pos += sizeof(*hdr); - pr_debug("Response size: cp: %d, pkt: %d, subpkt: %d\n", - be32_to_cpu(hdr->cp.length), - be32_to_cpu(hdr->pkt.length), - be32_to_cpu(hdr->subpkt.length)); + clen = be32_to_cpu(hdr->cp.length); + plen = be32_to_cpu(hdr->pkt.length); + slen = be32_to_cpu(hdr->subpkt.length); + pr_debug("Response size: cp: %u, pkt: %u, subpkt: %u\n", + clen, plen, slen); - if (hdr->cp.length == 0 || hdr->pkt.length == 0 || - hdr->subpkt.length == 0) { - pr_err("Bad header length. cp: %d, pkt: %d, subpkt: %d\n", - be32_to_cpu(hdr->cp.length), - be32_to_cpu(hdr->pkt.length), - be32_to_cpu(hdr->subpkt.length)); + if (clen == 0 || plen == 0 || slen == 0 || + slen > IO_BUFFER_LENGTH - sizeof(*hdr)) { + pr_err("Bad header length. cp: %u, pkt: %u, subpkt: %u\n", + clen, plen, slen); print_buffer(pos, sizeof(*hdr)); return -EINVAL; } @@ -837,7 +824,7 @@ static int response_parse(const u8 *buf, size_t length, return -EFAULT; iter = resp->toks; - total = be32_to_cpu(hdr->subpkt.length); + total = slen; print_buffer(pos, total); while (total > 0) { if (pos[0] <= TINY_ATOM_BYTE) /* tiny atom */ @@ -851,8 +838,8 @@ static int response_parse(const u8 *buf, size_t length, else /* TOKEN */ token_length = response_parse_token(iter, pos); - if (token_length == -EINVAL) - return -EINVAL; + if (token_length < 0) + return token_length; pos += token_length; total -= token_length; @@ -922,20 +909,32 @@ static u64 response_get_u64(const struct parsed_resp *resp, int n) return resp->toks[n].stored.u; } +static bool response_token_matches(const struct opal_resp_tok *token, u8 match) +{ + if (IS_ERR(token) || + token->type != OPAL_DTA_TOKENID_TOKEN || + token->pos[0] != match) + return false; + return true; +} + static u8 response_status(const struct parsed_resp *resp) { - if (token_type(resp, 0) == OPAL_DTA_TOKENID_TOKEN && - response_get_token(resp, 0) == OPAL_ENDOFSESSION) { + const struct opal_resp_tok *tok; + + tok = response_get_token(resp, 0); + if (response_token_matches(tok, OPAL_ENDOFSESSION)) return 0; - } if (resp->num < 5) return DTAERROR_NO_METHOD_STATUS; - if (token_type(resp, resp->num - 1) != OPAL_DTA_TOKENID_TOKEN || - token_type(resp, resp->num - 5) != OPAL_DTA_TOKENID_TOKEN || - response_get_token(resp, resp->num - 1) != OPAL_ENDLIST || - response_get_token(resp, resp->num - 5) != OPAL_STARTLIST) + tok = response_get_token(resp, resp->num - 5); + if (!response_token_matches(tok, OPAL_STARTLIST)) + return DTAERROR_NO_METHOD_STATUS; + + tok = response_get_token(resp, resp->num - 1); + if (!response_token_matches(tok, OPAL_ENDLIST)) return DTAERROR_NO_METHOD_STATUS; return response_get_u64(resp, resp->num - 4); @@ -1022,7 +1021,7 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont) return opal_send_recv(dev, cont); } -static int gen_key(struct opal_dev *dev) +static int gen_key(struct opal_dev *dev, void *data) { const u8 *method; u8 uid[OPAL_UID_LENGTH]; @@ -1076,15 +1075,14 @@ static int get_active_key_cont(struct opal_dev *dev) return 0; } -static int get_active_key(struct opal_dev *dev) +static int get_active_key(struct opal_dev *dev, void *data) { u8 uid[OPAL_UID_LENGTH]; int err = 0; - u8 *lr; + u8 *lr = data; clear_opal_cmd(dev); set_comid(dev, dev->comid); - lr = dev->func_data[dev->state]; err = build_locking_range(uid, sizeof(uid), *lr); if (err) @@ -1167,17 +1165,16 @@ static inline int enable_global_lr(struct opal_dev *dev, u8 *uid, return err; } -static int setup_locking_range(struct opal_dev *dev) +static int setup_locking_range(struct opal_dev *dev, void *data) { u8 uid[OPAL_UID_LENGTH]; - struct opal_user_lr_setup *setup; + struct opal_user_lr_setup *setup = data; u8 lr; int err = 0; clear_opal_cmd(dev); set_comid(dev, dev->comid); - setup = dev->func_data[dev->state]; lr = setup->session.opal_key.lr; err = build_locking_range(uid, sizeof(uid), lr); if (err) @@ -1290,20 +1287,19 @@ static int start_generic_opal_session(struct opal_dev *dev, return finalize_and_send(dev, start_opal_session_cont); } -static int start_anybodyASP_opal_session(struct opal_dev *dev) +static int start_anybodyASP_opal_session(struct opal_dev *dev, void *data) { return start_generic_opal_session(dev, OPAL_ANYBODY_UID, OPAL_ADMINSP_UID, NULL, 0); } -static int start_SIDASP_opal_session(struct opal_dev *dev) +static int start_SIDASP_opal_session(struct opal_dev *dev, void *data) { int ret; const u8 *key = dev->prev_data; - struct opal_key *okey; if (!key) { - okey = dev->func_data[dev->state]; + const struct opal_key *okey = data; ret = start_generic_opal_session(dev, OPAL_SID_UID, OPAL_ADMINSP_UID, okey->key, @@ -1318,22 +1314,21 @@ static int start_SIDASP_opal_session(struct opal_dev *dev) return ret; } -static inline int start_admin1LSP_opal_session(struct opal_dev *dev) +static int start_admin1LSP_opal_session(struct opal_dev *dev, void *data) { - struct opal_key *key = dev->func_data[dev->state]; - + struct opal_key *key = data; return start_generic_opal_session(dev, OPAL_ADMIN1_UID, OPAL_LOCKINGSP_UID, key->key, key->key_len); } -static int start_auth_opal_session(struct opal_dev *dev) +static int start_auth_opal_session(struct opal_dev *dev, void *data) { + struct opal_session_info *session = data; u8 lk_ul_user[OPAL_UID_LENGTH]; + size_t keylen = session->opal_key.key_len; int err = 0; - struct opal_session_info *session = dev->func_data[dev->state]; - size_t keylen = session->opal_key.key_len; u8 *key = session->opal_key.key; u32 hsn = GENERIC_HOST_SESSION_NUM; @@ -1383,7 +1378,7 @@ static int start_auth_opal_session(struct opal_dev *dev) return finalize_and_send(dev, start_opal_session_cont); } -static int revert_tper(struct opal_dev *dev) +static int revert_tper(struct opal_dev *dev, void *data) { int err = 0; @@ -1405,9 +1400,9 @@ static int revert_tper(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int internal_activate_user(struct opal_dev *dev) +static int internal_activate_user(struct opal_dev *dev, void *data) { - struct opal_session_info *session = dev->func_data[dev->state]; + struct opal_session_info *session = data; u8 uid[OPAL_UID_LENGTH]; int err = 0; @@ -1440,15 +1435,14 @@ static int internal_activate_user(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int erase_locking_range(struct opal_dev *dev) +static int erase_locking_range(struct opal_dev *dev, void *data) { - struct opal_session_info *session; + struct opal_session_info *session = data; u8 uid[OPAL_UID_LENGTH]; int err = 0; clear_opal_cmd(dev); set_comid(dev, dev->comid); - session = dev->func_data[dev->state]; if (build_locking_range(uid, sizeof(uid), session->opal_key.lr) < 0) return -ERANGE; @@ -1467,9 +1461,9 @@ static int erase_locking_range(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int set_mbr_done(struct opal_dev *dev) +static int set_mbr_done(struct opal_dev *dev, void *data) { - u8 mbr_done_tf = *(u8 *)dev->func_data[dev->state]; + u8 *mbr_done_tf = data; int err = 0; clear_opal_cmd(dev); @@ -1485,7 +1479,7 @@ static int set_mbr_done(struct opal_dev *dev) add_token_u8(&err, dev, OPAL_STARTLIST); add_token_u8(&err, dev, OPAL_STARTNAME); add_token_u8(&err, dev, 2); /* Done */ - add_token_u8(&err, dev, mbr_done_tf); /* Done T or F */ + add_token_u8(&err, dev, *mbr_done_tf); /* Done T or F */ add_token_u8(&err, dev, OPAL_ENDNAME); add_token_u8(&err, dev, OPAL_ENDLIST); add_token_u8(&err, dev, OPAL_ENDNAME); @@ -1499,9 +1493,9 @@ static int set_mbr_done(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int set_mbr_enable_disable(struct opal_dev *dev) +static int set_mbr_enable_disable(struct opal_dev *dev, void *data) { - u8 mbr_en_dis = *(u8 *)dev->func_data[dev->state]; + u8 *mbr_en_dis = data; int err = 0; clear_opal_cmd(dev); @@ -1517,7 +1511,7 @@ static int set_mbr_enable_disable(struct opal_dev *dev) add_token_u8(&err, dev, OPAL_STARTLIST); add_token_u8(&err, dev, OPAL_STARTNAME); add_token_u8(&err, dev, 1); - add_token_u8(&err, dev, mbr_en_dis); + add_token_u8(&err, dev, *mbr_en_dis); add_token_u8(&err, dev, OPAL_ENDNAME); add_token_u8(&err, dev, OPAL_ENDLIST); add_token_u8(&err, dev, OPAL_ENDNAME); @@ -1558,11 +1552,10 @@ static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid, return err; } -static int set_new_pw(struct opal_dev *dev) +static int set_new_pw(struct opal_dev *dev, void *data) { u8 cpin_uid[OPAL_UID_LENGTH]; - struct opal_session_info *usr = dev->func_data[dev->state]; - + struct opal_session_info *usr = data; memcpy(cpin_uid, opaluid[OPAL_C_PIN_ADMIN1], OPAL_UID_LENGTH); @@ -1583,10 +1576,10 @@ static int set_new_pw(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int set_sid_cpin_pin(struct opal_dev *dev) +static int set_sid_cpin_pin(struct opal_dev *dev, void *data) { u8 cpin_uid[OPAL_UID_LENGTH]; - struct opal_key *key = dev->func_data[dev->state]; + struct opal_key *key = data; memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH); @@ -1597,18 +1590,16 @@ static int set_sid_cpin_pin(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int add_user_to_lr(struct opal_dev *dev) +static int add_user_to_lr(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; u8 user_uid[OPAL_UID_LENGTH]; - struct opal_lock_unlock *lkul; + struct opal_lock_unlock *lkul = data; int err = 0; clear_opal_cmd(dev); set_comid(dev, dev->comid); - lkul = dev->func_data[dev->state]; - memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED], OPAL_UID_LENGTH); @@ -1675,11 +1666,11 @@ static int add_user_to_lr(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int lock_unlock_locking_range(struct opal_dev *dev) +static int lock_unlock_locking_range(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; const u8 *method; - struct opal_lock_unlock *lkul; + struct opal_lock_unlock *lkul = data; u8 read_locked = 1, write_locked = 1; int err = 0; @@ -1687,7 +1678,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev) set_comid(dev, dev->comid); method = opalmethod[OPAL_SET]; - lkul = dev->func_data[dev->state]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -1739,19 +1729,18 @@ static int lock_unlock_locking_range(struct opal_dev *dev) } -static int lock_unlock_locking_range_sum(struct opal_dev *dev) +static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; u8 read_locked = 1, write_locked = 1; const u8 *method; - struct opal_lock_unlock *lkul; + struct opal_lock_unlock *lkul = data; int ret; clear_opal_cmd(dev); set_comid(dev, dev->comid); method = opalmethod[OPAL_SET]; - lkul = dev->func_data[dev->state]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -1782,9 +1771,9 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int activate_lsp(struct opal_dev *dev) +static int activate_lsp(struct opal_dev *dev, void *data) { - struct opal_lr_act *opal_act; + struct opal_lr_act *opal_act = data; u8 user_lr[OPAL_UID_LENGTH]; u8 uint_3 = 0x83; int err = 0, i; @@ -1792,8 +1781,6 @@ static int activate_lsp(struct opal_dev *dev) clear_opal_cmd(dev); set_comid(dev, dev->comid); - opal_act = dev->func_data[dev->state]; - add_token_u8(&err, dev, OPAL_CALL); add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], OPAL_UID_LENGTH); @@ -1858,7 +1845,7 @@ static int get_lsp_lifecycle_cont(struct opal_dev *dev) } /* Determine if we're in the Manufactured Inactive or Active state */ -static int get_lsp_lifecycle(struct opal_dev *dev) +static int get_lsp_lifecycle(struct opal_dev *dev, void *data) { int err = 0; @@ -1919,14 +1906,13 @@ static int get_msid_cpin_pin_cont(struct opal_dev *dev) return 0; } -static int get_msid_cpin_pin(struct opal_dev *dev) +static int get_msid_cpin_pin(struct opal_dev *dev, void *data) { int err = 0; clear_opal_cmd(dev); set_comid(dev, dev->comid); - add_token_u8(&err, dev, OPAL_CALL); add_token_bytestring(&err, dev, opaluid[OPAL_C_PIN_MSID], OPAL_UID_LENGTH); @@ -1956,64 +1942,76 @@ static int get_msid_cpin_pin(struct opal_dev *dev) return finalize_and_send(dev, get_msid_cpin_pin_cont); } -static int build_end_opal_session(struct opal_dev *dev) +static int end_opal_session(struct opal_dev *dev, void *data) { int err = 0; clear_opal_cmd(dev); - set_comid(dev, dev->comid); add_token_u8(&err, dev, OPAL_ENDOFSESSION); - return err; -} -static int end_opal_session(struct opal_dev *dev) -{ - int ret = build_end_opal_session(dev); - - if (ret < 0) - return ret; + if (err < 0) + return err; return finalize_and_send(dev, end_session_cont); } static int end_opal_session_error(struct opal_dev *dev) { - const opal_step error_end_session[] = { - end_opal_session, - NULL, + const struct opal_step error_end_session[] = { + { end_opal_session, }, + { NULL, } }; - dev->funcs = error_end_session; - dev->state = 0; + dev->steps = error_end_session; return next(dev); } static inline void setup_opal_dev(struct opal_dev *dev, - const opal_step *funcs) + const struct opal_step *steps) { - dev->state = 0; - dev->funcs = funcs; + dev->steps = steps; dev->tsn = 0; dev->hsn = 0; - dev->func_data = NULL; dev->prev_data = NULL; } static int check_opal_support(struct opal_dev *dev) { - static const opal_step funcs[] = { - opal_discovery0, - NULL + const struct opal_step steps[] = { + { opal_discovery0, }, + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, funcs); + setup_opal_dev(dev, steps); ret = next(dev); dev->supported = !ret; mutex_unlock(&dev->dev_lock); return ret; } +static void clean_opal_dev(struct opal_dev *dev) +{ + + struct opal_suspend_data *suspend, *next; + + mutex_lock(&dev->dev_lock); + list_for_each_entry_safe(suspend, next, &dev->unlk_lst, node) { + list_del(&suspend->node); + kfree(suspend); + } + mutex_unlock(&dev->dev_lock); +} + +void free_opal_dev(struct opal_dev *dev) +{ + if (!dev) + return; + clean_opal_dev(dev); + kfree(dev); +} +EXPORT_SYMBOL(free_opal_dev); + struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv) { struct opal_dev *dev; @@ -2038,24 +2036,18 @@ EXPORT_SYMBOL(init_opal_dev); static int opal_secure_erase_locking_range(struct opal_dev *dev, struct opal_session_info *opal_session) { - void *data[3] = { NULL }; - static const opal_step erase_funcs[] = { - opal_discovery0, - start_auth_opal_session, - get_active_key, - gen_key, - end_opal_session, - NULL, + const struct opal_step erase_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, opal_session }, + { get_active_key, &opal_session->opal_key.lr }, + { gen_key, }, + { end_opal_session, }, + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, erase_funcs); - - dev->func_data = data; - dev->func_data[1] = opal_session; - dev->func_data[2] = &opal_session->opal_key.lr; - + setup_opal_dev(dev, erase_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2064,23 +2056,17 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev, static int opal_erase_locking_range(struct opal_dev *dev, struct opal_session_info *opal_session) { - void *data[3] = { NULL }; - static const opal_step erase_funcs[] = { - opal_discovery0, - start_auth_opal_session, - erase_locking_range, - end_opal_session, - NULL, + const struct opal_step erase_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, opal_session }, + { erase_locking_range, opal_session }, + { end_opal_session, }, + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, erase_funcs); - - dev->func_data = data; - dev->func_data[1] = opal_session; - dev->func_data[2] = opal_session; - + setup_opal_dev(dev, erase_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2089,16 +2075,15 @@ static int opal_erase_locking_range(struct opal_dev *dev, static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, struct opal_mbr_data *opal_mbr) { - void *func_data[6] = { NULL }; - static const opal_step mbr_funcs[] = { - opal_discovery0, - start_admin1LSP_opal_session, - set_mbr_done, - end_opal_session, - start_admin1LSP_opal_session, - set_mbr_enable_disable, - end_opal_session, - NULL, + const struct opal_step mbr_steps[] = { + { opal_discovery0, }, + { start_admin1LSP_opal_session, &opal_mbr->key }, + { set_mbr_done, &opal_mbr->enable_disable }, + { end_opal_session, }, + { start_admin1LSP_opal_session, &opal_mbr->key }, + { set_mbr_enable_disable, &opal_mbr->enable_disable }, + { end_opal_session, }, + { NULL, } }; int ret; @@ -2107,12 +2092,7 @@ static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, return -EINVAL; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, mbr_funcs); - dev->func_data = func_data; - dev->func_data[1] = &opal_mbr->key; - dev->func_data[2] = &opal_mbr->enable_disable; - dev->func_data[4] = &opal_mbr->key; - dev->func_data[5] = &opal_mbr->enable_disable; + setup_opal_dev(dev, mbr_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2139,13 +2119,12 @@ static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) static int opal_add_user_to_lr(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) { - void *func_data[3] = { NULL }; - static const opal_step funcs[] = { - opal_discovery0, - start_admin1LSP_opal_session, - add_user_to_lr, - end_opal_session, - NULL + const struct opal_step steps[] = { + { opal_discovery0, }, + { start_admin1LSP_opal_session, &lk_unlk->session.opal_key }, + { add_user_to_lr, lk_unlk }, + { end_opal_session, }, + { NULL, } }; int ret; @@ -2167,10 +2146,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev, } mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, funcs); - dev->func_data = func_data; - dev->func_data[1] = &lk_unlk->session.opal_key; - dev->func_data[2] = lk_unlk; + setup_opal_dev(dev, steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2178,55 +2154,54 @@ static int opal_add_user_to_lr(struct opal_dev *dev, static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal) { - void *data[2] = { NULL }; - static const opal_step revert_funcs[] = { - opal_discovery0, - start_SIDASP_opal_session, - revert_tper, /* controller will terminate session */ - NULL, + const struct opal_step revert_steps[] = { + { opal_discovery0, }, + { start_SIDASP_opal_session, opal }, + { revert_tper, }, /* controller will terminate session */ + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, revert_funcs); - dev->func_data = data; - dev->func_data[1] = opal; + setup_opal_dev(dev, revert_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); + + /* + * If we successfully reverted lets clean + * any saved locking ranges. + */ + if (!ret) + clean_opal_dev(dev); + return ret; } -static int __opal_lock_unlock_sum(struct opal_dev *dev) +static int __opal_lock_unlock(struct opal_dev *dev, + struct opal_lock_unlock *lk_unlk) { - static const opal_step ulk_funcs_sum[] = { - opal_discovery0, - start_auth_opal_session, - lock_unlock_locking_range_sum, - end_opal_session, - NULL + const struct opal_step unlock_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, &lk_unlk->session }, + { lock_unlock_locking_range, lk_unlk }, + { end_opal_session, }, + { NULL, } + }; + const struct opal_step unlock_sum_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, &lk_unlk->session }, + { lock_unlock_locking_range_sum, lk_unlk }, + { end_opal_session, }, + { NULL, } }; - dev->funcs = ulk_funcs_sum; + dev->steps = lk_unlk->session.sum ? unlock_sum_steps : unlock_steps; return next(dev); } -static int __opal_lock_unlock(struct opal_dev *dev) +static int opal_lock_unlock(struct opal_dev *dev, + struct opal_lock_unlock *lk_unlk) { - static const opal_step _unlock_funcs[] = { - opal_discovery0, - start_auth_opal_session, - lock_unlock_locking_range, - end_opal_session, - NULL - }; - - dev->funcs = _unlock_funcs; - return next(dev); -} - -static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) -{ - void *func_data[3] = { NULL }; int ret; if (lk_unlk->session.who < OPAL_ADMIN1 || @@ -2234,43 +2209,30 @@ static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_un return -EINVAL; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, NULL); - dev->func_data = func_data; - dev->func_data[1] = &lk_unlk->session; - dev->func_data[2] = lk_unlk; - - if (lk_unlk->session.sum) - ret = __opal_lock_unlock_sum(dev); - else - ret = __opal_lock_unlock(dev); - + ret = __opal_lock_unlock(dev, lk_unlk); mutex_unlock(&dev->dev_lock); return ret; } static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) { - static const opal_step owner_funcs[] = { - opal_discovery0, - start_anybodyASP_opal_session, - get_msid_cpin_pin, - end_opal_session, - start_SIDASP_opal_session, - set_sid_cpin_pin, - end_opal_session, - NULL + const struct opal_step owner_steps[] = { + { opal_discovery0, }, + { start_anybodyASP_opal_session, }, + { get_msid_cpin_pin, }, + { end_opal_session, }, + { start_SIDASP_opal_session, opal }, + { set_sid_cpin_pin, opal }, + { end_opal_session, }, + { NULL, } }; - void *data[6] = { NULL }; int ret; if (!dev) return -ENODEV; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, owner_funcs); - dev->func_data = data; - dev->func_data[4] = opal; - dev->func_data[5] = opal; + setup_opal_dev(dev, owner_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2278,14 +2240,13 @@ static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_act) { - void *data[4] = { NULL }; - static const opal_step active_funcs[] = { - opal_discovery0, - start_SIDASP_opal_session, /* Open session as SID auth */ - get_lsp_lifecycle, - activate_lsp, - end_opal_session, - NULL + const struct opal_step active_steps[] = { + { opal_discovery0, }, + { start_SIDASP_opal_session, &opal_lr_act->key }, + { get_lsp_lifecycle, }, + { activate_lsp, opal_lr_act }, + { end_opal_session, }, + { NULL, } }; int ret; @@ -2293,10 +2254,7 @@ static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_a return -EINVAL; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, active_funcs); - dev->func_data = data; - dev->func_data[1] = &opal_lr_act->key; - dev->func_data[3] = opal_lr_act; + setup_opal_dev(dev, active_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2305,21 +2263,17 @@ static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_a static int opal_setup_locking_range(struct opal_dev *dev, struct opal_user_lr_setup *opal_lrs) { - void *data[3] = { NULL }; - static const opal_step lr_funcs[] = { - opal_discovery0, - start_auth_opal_session, - setup_locking_range, - end_opal_session, - NULL, + const struct opal_step lr_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, &opal_lrs->session }, + { setup_locking_range, opal_lrs }, + { end_opal_session, }, + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, lr_funcs); - dev->func_data = data; - dev->func_data[1] = &opal_lrs->session; - dev->func_data[2] = opal_lrs; + setup_opal_dev(dev, lr_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2327,14 +2281,13 @@ static int opal_setup_locking_range(struct opal_dev *dev, static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) { - static const opal_step pw_funcs[] = { - opal_discovery0, - start_auth_opal_session, - set_new_pw, - end_opal_session, - NULL + const struct opal_step pw_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, &opal_pw->session }, + { set_new_pw, &opal_pw->new_user_pw }, + { end_opal_session, }, + { NULL } }; - void *data[3] = { NULL }; int ret; if (opal_pw->session.who < OPAL_ADMIN1 || @@ -2344,11 +2297,7 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) return -EINVAL; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, pw_funcs); - dev->func_data = data; - dev->func_data[1] = (void *) &opal_pw->session; - dev->func_data[2] = (void *) &opal_pw->new_user_pw; - + setup_opal_dev(dev, pw_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2357,14 +2306,13 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) static int opal_activate_user(struct opal_dev *dev, struct opal_session_info *opal_session) { - static const opal_step act_funcs[] = { - opal_discovery0, - start_admin1LSP_opal_session, - internal_activate_user, - end_opal_session, - NULL + const struct opal_step act_steps[] = { + { opal_discovery0, }, + { start_admin1LSP_opal_session, &opal_session->opal_key }, + { internal_activate_user, opal_session }, + { end_opal_session, }, + { NULL, } }; - void *data[3] = { NULL }; int ret; /* We can't activate Admin1 it's active as manufactured */ @@ -2375,10 +2323,7 @@ static int opal_activate_user(struct opal_dev *dev, } mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, act_funcs); - dev->func_data = data; - dev->func_data[1] = &opal_session->opal_key; - dev->func_data[2] = opal_session; + setup_opal_dev(dev, act_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2387,7 +2332,6 @@ static int opal_activate_user(struct opal_dev *dev, bool opal_unlock_from_suspend(struct opal_dev *dev) { struct opal_suspend_data *suspend; - void *func_data[3] = { NULL }; bool was_failure = false; int ret = 0; @@ -2398,19 +2342,12 @@ bool opal_unlock_from_suspend(struct opal_dev *dev) mutex_lock(&dev->dev_lock); setup_opal_dev(dev, NULL); - dev->func_data = func_data; list_for_each_entry(suspend, &dev->unlk_lst, node) { - dev->state = 0; - dev->func_data[1] = &suspend->unlk.session; - dev->func_data[2] = &suspend->unlk; dev->tsn = 0; dev->hsn = 0; - if (suspend->unlk.session.sum) - ret = __opal_lock_unlock_sum(dev); - else - ret = __opal_lock_unlock(dev); + ret = __opal_lock_unlock(dev, &suspend->unlk); if (ret) { pr_warn("Failed to unlock LR %hhu with sum %d\n", suspend->unlk.session.opal_key.lr, @@ -2437,7 +2374,7 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) return -ENOTSUPP; } - p = memdup_user(arg, _IOC_SIZE(cmd)); + p = memdup_user(arg, _IOC_SIZE(cmd)); if (IS_ERR(p)) return PTR_ERR(p); diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index a18de9d727b0..01a1f7e24978 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -17,15 +17,15 @@ * 02111-1307, USA. * * Questions/Comments/Bugfixes to iss_storagedev@hp.com - * + * * Author: Stephen M. Cameron */ #ifdef CONFIG_CISS_SCSI_TAPE -/* Here we have code to present the driver as a scsi driver - as it is simultaneously presented as a block driver. The +/* Here we have code to present the driver as a scsi driver + as it is simultaneously presented as a block driver. The reason for doing this is to allow access to SCSI tape drives - through the array controller. Note in particular, neither + through the array controller. Note in particular, neither physical nor logical disks are presented through the scsi layer. */ #include @@ -37,7 +37,7 @@ #include #include -#include +#include #include "cciss_scsi.h" @@ -120,7 +120,7 @@ struct cciss_scsi_adapter_data_t { struct cciss_scsi_cmd_stack_t cmd_stack; SGDescriptor_struct **cmd_sg_list; int registered; - spinlock_t lock; // to protect ccissscsi[ctlr]; + spinlock_t lock; // to protect ccissscsi[ctlr]; }; #define CPQ_TAPE_LOCK(h, flags) spin_lock_irqsave( \ @@ -143,36 +143,36 @@ scsi_cmd_alloc(ctlr_info_t *h) u64bit temp64; sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; - if (stk->top < 0) + if (stk->top < 0) return NULL; - c = stk->elem[stk->top]; + c = stk->elem[stk->top]; /* memset(c, 0, sizeof(*c)); */ memset(&c->cmd, 0, sizeof(c->cmd)); memset(&c->Err, 0, sizeof(c->Err)); /* set physical addr of cmd and addr of scsi parameters */ - c->cmd.busaddr = c->busaddr; + c->cmd.busaddr = c->busaddr; c->cmd.cmdindex = c->cmdindex; - /* (__u32) (stk->cmd_pool_handle + + /* (__u32) (stk->cmd_pool_handle + (sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top)); */ temp64.val = (__u64) (c->busaddr + sizeof(CommandList_struct)); - /* (__u64) (stk->cmd_pool_handle + + /* (__u64) (stk->cmd_pool_handle + (sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top) + sizeof(CommandList_struct)); */ stk->top--; c->cmd.ErrDesc.Addr.lower = temp64.val32.lower; c->cmd.ErrDesc.Addr.upper = temp64.val32.upper; c->cmd.ErrDesc.Len = sizeof(ErrorInfo_struct); - + c->cmd.ctlr = h->ctlr; c->cmd.err_info = &c->Err; return (CommandList_struct *) c; } -static void +static void scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c) { /* assume only one process in here at a time, locking done by caller. */ @@ -183,7 +183,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c) struct cciss_scsi_cmd_stack_t *stk; sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; stk->top++; if (stk->top >= stk->nelems) { dev_err(&h->pdev->dev, @@ -228,7 +228,7 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa) } for (i = 0; i < stk->nelems; i++) { stk->elem[i] = &stk->pool[i]; - stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + + stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + (sizeof(struct cciss_scsi_cmd_stack_elem_t) * i)); stk->elem[i]->cmdindex = i; } @@ -244,7 +244,7 @@ scsi_cmd_stack_free(ctlr_info_t *h) size_t size; sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; if (stk->top != stk->nelems-1) { dev_warn(&h->pdev->dev, "bug: %d scsi commands are still outstanding.\n", @@ -266,7 +266,7 @@ print_cmd(CommandList_struct *cp) printk("queue:%d\n", cp->Header.ReplyQueue); printk("sglist:%d\n", cp->Header.SGList); printk("sgtot:%d\n", cp->Header.SGTotal); - printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper, + printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper, cp->Header.Tag.lower); printk("LUN:0x%8phN\n", cp->Header.LUN.LunAddrBytes); printk("CDBLen:%d\n", cp->Request.CDBLen); @@ -275,8 +275,8 @@ print_cmd(CommandList_struct *cp) printk(" Dir:%d\n",cp->Request.Type.Direction); printk("Timeout:%d\n",cp->Request.Timeout); printk("CDB: %16ph\n", cp->Request.CDB); - printk("edesc.Addr: 0x%08x/0%08x, Len = %d\n", - cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower, + printk("edesc.Addr: 0x%08x/0%08x, Len = %d\n", + cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower, cp->ErrDesc.Len); printk("sgs..........Errorinfo:\n"); printk("scsistatus:%d\n", cp->err_info->ScsiStatus); @@ -289,7 +289,7 @@ print_cmd(CommandList_struct *cp) } #endif -static int +static int find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun) { /* finds an unused bus, target, lun for a new device */ @@ -299,24 +299,24 @@ find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun) memset(&target_taken[0], 0, CCISS_MAX_SCSI_DEVS_PER_HBA); - target_taken[SELF_SCSI_ID] = 1; + target_taken[SELF_SCSI_ID] = 1; for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) target_taken[ccissscsi[h->ctlr].dev[i].target] = 1; - + for (i = 0; i < CCISS_MAX_SCSI_DEVS_PER_HBA; i++) { if (!target_taken[i]) { *bus = 0; *target=i; *lun = 0; found=1; break; } } - return (!found); + return (!found); } struct scsi2map { char scsi3addr[8]; int bus, target, lun; }; -static int +static int cciss_scsi_add_entry(ctlr_info_t *h, int hostno, struct cciss_scsi_dev_t *device, struct scsi2map *added, int *nadded) @@ -381,8 +381,8 @@ cciss_scsi_add_entry(ctlr_info_t *h, int hostno, ccissscsi[h->ctlr].ndevices++; - /* initially, (before registering with scsi layer) we don't - know our hostno and we don't want to print anything first + /* initially, (before registering with scsi layer) we don't + know our hostno and we don't want to print anything first time anyway (the scsi layer's inquiries will show that info) */ if (hostno != -1) dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n", @@ -467,7 +467,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno, /* sd contains scsi3 addresses and devtypes, but bus target and lun are not filled in. This funciton takes what's in sd to be the current and adjusts - ccissscsi[] to be in line with what's in sd. */ + ccissscsi[] to be in line with what's in sd. */ int i,j, found, changes=0; struct cciss_scsi_dev_t *csd; @@ -492,7 +492,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno, if (hostno != -1) /* if it's not the first time... */ sh = h->scsi_ctlr->scsi_host; - /* find any devices in ccissscsi[] that are not in + /* find any devices in ccissscsi[] that are not in sd[] and remove them from ccissscsi[] */ i = 0; @@ -512,7 +512,7 @@ adjust_cciss_scsi_table(ctlr_info_t *h, int hostno, } } - if (found == 0) { /* device no longer present. */ + if (found == 0) { /* device no longer present. */ changes++; cciss_scsi_remove_entry(h, hostno, i, removed, &nremoved); @@ -641,14 +641,13 @@ lookup_scsi3addr(ctlr_info_t *h, int bus, int target, int lun, char *scsi3addr) return -1; } -static void +static void cciss_scsi_setup(ctlr_info_t *h) { struct cciss_scsi_adapter_data_t * shba; ccissscsi[h->ctlr].ndevices = 0; - shba = (struct cciss_scsi_adapter_data_t *) - kmalloc(sizeof(*shba), GFP_KERNEL); + shba = kmalloc(sizeof(*shba), GFP_KERNEL); if (shba == NULL) return; shba->scsi_host = NULL; @@ -693,20 +692,18 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, /* copy the sense data whether we need to or not. */ - memcpy(cmd->sense_buffer, ei->SenseInfo, + memcpy(cmd->sense_buffer, ei->SenseInfo, ei->SenseLen > SCSI_SENSE_BUFFERSIZE ? - SCSI_SENSE_BUFFERSIZE : + SCSI_SENSE_BUFFERSIZE : ei->SenseLen); scsi_set_resid(cmd, ei->ResidualCnt); - if(ei->CommandStatus != 0) - { /* an error has occurred */ - switch(ei->CommandStatus) - { + if (ei->CommandStatus != 0) { /* an error has occurred */ + switch (ei->CommandStatus) { case CMD_TARGET_STATUS: /* Pass it up to the upper layers... */ if (!ei->ScsiStatus) { - + /* Ordinarily, this case should never happen, but there is a bug in some released firmware revisions that allows it to happen if, for example, a 4100 backplane loses power and the tape @@ -731,7 +728,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, print_cmd(c); */ /* We get CMD_INVALID if you address a non-existent tape drive instead - of a selection timeout (no response). You will see this if you yank + of a selection timeout (no response). You will see this if you yank out a tape drive, then try to access it. This is kind of a shame because it means that any other CMD_INVALID (e.g. driver bug) will get interpreted as a missing target. */ @@ -780,7 +777,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, cmd->result = DID_ERROR << 16; dev_warn(&h->pdev->dev, "%p returned unknown status %x\n", c, - ei->CommandStatus); + ei->CommandStatus); } } cmd->scsi_done(cmd); @@ -796,15 +793,15 @@ cciss_scsi_detect(ctlr_info_t *h) sh = scsi_host_alloc(&cciss_driver_template, sizeof(struct ctlr_info *)); if (sh == NULL) goto fail; - sh->io_port = 0; // good enough? FIXME, + sh->io_port = 0; // good enough? FIXME, sh->n_io_port = 0; // I don't think we use these two... - sh->this_id = SELF_SCSI_ID; + sh->this_id = SELF_SCSI_ID; sh->can_queue = cciss_tape_cmds; sh->sg_tablesize = h->maxsgentries; sh->max_cmd_len = MAX_COMMAND_SIZE; sh->max_sectors = h->cciss_max_sectors; - ((struct cciss_scsi_adapter_data_t *) + ((struct cciss_scsi_adapter_data_t *) h->scsi_ctlr)->scsi_host = sh; sh->hostdata[0] = (unsigned long) h; sh->irq = h->intr[SIMPLE_MODE_INT]; @@ -856,7 +853,7 @@ cciss_map_one(struct pci_dev *pdev, static int cciss_scsi_do_simple_cmd(ctlr_info_t *h, CommandList_struct *c, - unsigned char *scsi3addr, + unsigned char *scsi3addr, unsigned char *cdb, unsigned char cdblen, unsigned char *buf, int bufsize, @@ -871,7 +868,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h, c->Header.Tag.lower = c->busaddr; /* Use k. address of cmd as tag */ // Fill in the request block... - /* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n", + /* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n", scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3], scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]); */ @@ -885,7 +882,7 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h, /* Fill in the SG list and do dma mapping */ cciss_map_one(h->pdev, c, (unsigned char *) buf, - bufsize, DMA_FROM_DEVICE); + bufsize, DMA_FROM_DEVICE); c->waiting = &wait; enqueue_cmd_and_start_io(h, c); @@ -896,14 +893,13 @@ cciss_scsi_do_simple_cmd(ctlr_info_t *h, return(0); } -static void +static void cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c) { ErrorInfo_struct *ei; ei = c->err_info; - switch(ei->CommandStatus) - { + switch (ei->CommandStatus) { case CMD_TARGET_STATUS: dev_warn(&h->pdev->dev, "cmd %p has completed with errors\n", c); @@ -1005,7 +1001,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr, if (rc != 0) return rc; /* something went wrong */ - if (ei->CommandStatus != 0 && + if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) { cciss_scsi_interpret_error(h, c); rc = -1; @@ -1013,7 +1009,7 @@ cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr, spin_lock_irqsave(&h->lock, flags); scsi_cmd_free(h, c); spin_unlock_irqrestore(&h->lock, flags); - return rc; + return rc; } /* Get the device id from inquiry page 0x83 */ @@ -1042,7 +1038,7 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h, int rc; CommandList_struct *c; unsigned char cdb[12]; - unsigned char scsi3addr[8]; + unsigned char scsi3addr[8]; ErrorInfo_struct *ei; unsigned long flags; @@ -1069,14 +1065,14 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h, cdb[11] = 0; rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr, - cdb, 12, - (unsigned char *) buf, + cdb, 12, + (unsigned char *) buf, bufsize, XFER_READ); if (rc != 0) return rc; /* something went wrong */ ei = c->err_info; - if (ei->CommandStatus != 0 && + if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) { cciss_scsi_interpret_error(h, c); rc = -1; @@ -1084,36 +1080,36 @@ cciss_scsi_do_report_phys_luns(ctlr_info_t *h, spin_lock_irqsave(&h->lock, flags); scsi_cmd_free(h, c); spin_unlock_irqrestore(&h->lock, flags); - return rc; + return rc; } static void cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) { /* the idea here is we could get notified from /proc - that some devices have changed, so we do a report - physical luns cmd, and adjust our list of devices + that some devices have changed, so we do a report + physical luns cmd, and adjust our list of devices accordingly. (We can't rely on the scsi-mid layer just - doing inquiries, because the "busses" that the scsi + doing inquiries, because the "busses" that the scsi mid-layer probes are totally fabricated by this driver, so new devices wouldn't show up. - the scsi3addr's of devices won't change so long as the - adapter is not reset. That means we can rescan and - tell which devices we already know about, vs. new + the scsi3addr's of devices won't change so long as the + adapter is not reset. That means we can rescan and + tell which devices we already know about, vs. new devices, vs. disappearing devices. Also, if you yank out a tape drive, then put in a disk - in it's place, (say, a configured volume from another - array controller for instance) _don't_ poke this driver - (so it thinks it's still a tape, but _do_ poke the scsi - mid layer, so it does an inquiry... the scsi mid layer + in it's place, (say, a configured volume from another + array controller for instance) _don't_ poke this driver + (so it thinks it's still a tape, but _do_ poke the scsi + mid layer, so it does an inquiry... the scsi mid layer will see the physical disk. This would be bad. Need to - think about how to prevent that. One idea would be to + think about how to prevent that. One idea would be to snoop all scsi responses and if an inquiry repsonse comes back that reports a disk, chuck it an return selection timeout instead and adjust our table... Not sure i like - that though. + that though. */ #define OBDR_TAPE_INQ_SIZE 49 @@ -1141,9 +1137,9 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) ch = &ld_buff->LUNListLength[0]; num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8; if (num_luns > CISS_MAX_PHYS_LUN) { - printk(KERN_WARNING + printk(KERN_WARNING "cciss: Maximum physical LUNs (%d) exceeded. " - "%d LUNs ignored.\n", CISS_MAX_PHYS_LUN, + "%d LUNs ignored.\n", CISS_MAX_PHYS_LUN, num_luns - CISS_MAX_PHYS_LUN); num_luns = CISS_MAX_PHYS_LUN; } @@ -1154,7 +1150,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) } - /* adjust our table of devices */ + /* adjust our table of devices */ for (i = 0; i < num_luns; i++) { /* for each physical lun, do an inquiry */ if (ld_buff->LUN[i][3] & 0xC0) continue; @@ -1182,8 +1178,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) cciss_scsi_get_device_id(h, scsi3addr, this_device->device_id, sizeof(this_device->device_id)); - switch (this_device->devtype) - { + switch (this_device->devtype) { case 0x05: /* CD-ROM */ { /* We don't *really* support actual CD-ROM devices, @@ -1213,7 +1208,7 @@ cciss_update_non_disk_devices(ctlr_info_t *h, int hostno) currentsd[ncurrent] = *this_device; ncurrent++; break; - default: + default: break; } } @@ -1258,8 +1253,8 @@ cciss_scsi_write_info(struct Scsi_Host *sh, return -EINVAL; return cciss_scsi_user_command(h, sh->host_no, - buffer, length); -} + buffer, length); +} static int cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh) @@ -1297,8 +1292,8 @@ cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh) return 0; } -/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci - dma mapping and fills in the scatter gather entries of the +/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci + dma mapping and fills in the scatter gather entries of the cciss command, c. */ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c, @@ -1394,7 +1389,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn // Fill in the command list header - cmd->scsi_done = done; // save this for use by completion code + cmd->scsi_done = done; // save this for use by completion code /* save c in case we have to abort it */ cmd->host_scribble = (unsigned char *) c; @@ -1404,7 +1399,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn c->Header.ReplyQueue = 0; /* unused in simple mode */ memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8); c->Header.Tag.lower = c->busaddr; /* Use k. address of cmd as tag */ - + // Fill in the request block... c->Request.Timeout = 0; @@ -1414,8 +1409,7 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len); c->Request.Type.Type = TYPE_CMD; c->Request.Type.Attribute = ATTR_SIMPLE; - switch(cmd->sc_data_direction) - { + switch (cmd->sc_data_direction) { case DMA_TO_DEVICE: c->Request.Type.Direction = XFER_WRITE; break; @@ -1432,15 +1426,15 @@ cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn c->Request.Type.Direction = XFER_RSVD; // This is technically wrong, and cciss controllers should - // reject it with CMD_INVALID, which is the most correct - // response, but non-fibre backends appear to let it + // reject it with CMD_INVALID, which is the most correct + // response, but non-fibre backends appear to let it // slide by, and give the same results as if this field // were set correctly. Either way is acceptable for // our purposes here. break; - default: + default: dev_warn(&h->pdev->dev, "unknown data direction: %d\n", cmd->sc_data_direction); BUG(); @@ -1464,9 +1458,9 @@ static void cciss_unregister_scsi(ctlr_info_t *h) spin_lock_irqsave(&h->lock, flags); sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; - /* if we weren't ever actually registered, don't unregister */ + /* if we weren't ever actually registered, don't unregister */ if (sa->registered) { spin_unlock_irqrestore(&h->lock, flags); scsi_remove_host(sa->scsi_host); @@ -1474,7 +1468,7 @@ static void cciss_unregister_scsi(ctlr_info_t *h) spin_lock_irqsave(&h->lock, flags); } - /* set scsi_host to NULL so our detect routine will + /* set scsi_host to NULL so our detect routine will find us on register */ sa->scsi_host = NULL; spin_unlock_irqrestore(&h->lock, flags); @@ -1490,7 +1484,7 @@ static int cciss_engage_scsi(ctlr_info_t *h) spin_lock_irqsave(&h->lock, flags); sa = h->scsi_ctlr; - stk = &sa->cmd_stack; + stk = &sa->cmd_stack; if (sa->registered) { dev_info(&h->pdev->dev, "SCSI subsystem already engaged.\n"); @@ -1586,13 +1580,13 @@ retry_tur: return rc; } -/* Need at least one of these error handlers to keep ../scsi/hosts.c from - * complaining. Doing a host- or bus-reset can't do anything good here. +/* Need at least one of these error handlers to keep ../scsi/hosts.c from + * complaining. Doing a host- or bus-reset can't do anything good here. * Despite what it might say in scsi_error.c, there may well be commands * on the controller, as the cciss driver registers twice, once as a block * device for the logical drives, and once as a scsi device, for any tape * drives. So we know there are no commands out on the tape drives, but we - * don't know there are no commands on the controller, and it is likely + * don't know there are no commands on the controller, and it is likely * that there probably are, as the cciss block device is most commonly used * as a boot device (embedded controller on HP/Compaq systems.) */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 0be84a3cb6d7..0bf2b21a62cb 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -96,6 +96,10 @@ static int max_part; static struct workqueue_struct *recv_workqueue; static int part_shift; +static int nbd_dev_dbg_init(struct nbd_device *nbd); +static void nbd_dev_dbg_close(struct nbd_device *nbd); + + static inline struct device *nbd_to_dev(struct nbd_device *nbd) { return disk_to_dev(nbd->disk); @@ -120,7 +124,7 @@ static const char *nbdcmd_to_ascii(int cmd) static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) { - bdev->bd_inode->i_size = 0; + bd_set_size(bdev, 0); set_capacity(nbd->disk, 0); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -129,29 +133,20 @@ static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev) { - if (!nbd_is_connected(nbd)) - return; - - bdev->bd_inode->i_size = nbd->bytesize; + blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize); + blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize); + bd_set_size(bdev, nbd->bytesize); set_capacity(nbd->disk, nbd->bytesize >> 9); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); } -static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, +static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, loff_t blocksize, loff_t nr_blocks) { - int ret; - - ret = set_blocksize(bdev, blocksize); - if (ret) - return ret; - nbd->blksize = blocksize; nbd->bytesize = blocksize * nr_blocks; - - nbd_size_update(nbd, bdev); - - return 0; + if (nbd_is_connected(nbd)) + nbd_size_update(nbd, bdev); } static void nbd_end_request(struct nbd_cmd *cmd) @@ -571,10 +566,17 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_OK; } -static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock) +static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, + unsigned long arg) { + struct socket *sock; struct nbd_sock **socks; struct nbd_sock *nsock; + int err; + + sock = sockfd_lookup(arg, &err); + if (!sock) + return err; if (!nbd->task_setup) nbd->task_setup = current; @@ -598,26 +600,20 @@ static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock) nsock->sock = sock; socks[nbd->num_connections++] = nsock; + if (max_part) + bdev->bd_invalidated = 1; return 0; } /* Reset all properties of an NBD device */ static void nbd_reset(struct nbd_device *nbd) { - int i; - - for (i = 0; i < nbd->num_connections; i++) - kfree(nbd->socks[i]); - kfree(nbd->socks); - nbd->socks = NULL; nbd->runtime_flags = 0; nbd->blksize = 1024; nbd->bytesize = 0; set_capacity(nbd->disk, 0); nbd->flags = 0; nbd->tag_set.timeout = 0; - nbd->num_connections = 0; - nbd->task_setup = NULL; queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); } @@ -659,81 +655,143 @@ static void send_disconnects(struct nbd_device *nbd) } } -static int nbd_dev_dbg_init(struct nbd_device *nbd); -static void nbd_dev_dbg_close(struct nbd_device *nbd); +static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev) +{ + dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); + if (!nbd->socks) + return -EINVAL; + + mutex_unlock(&nbd->config_lock); + fsync_bdev(bdev); + mutex_lock(&nbd->config_lock); + + /* Check again after getting mutex back. */ + if (!nbd->socks) + return -EINVAL; + + if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, + &nbd->runtime_flags)) + send_disconnects(nbd); + return 0; +} + +static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev) +{ + sock_shutdown(nbd); + nbd_clear_que(nbd); + kill_bdev(bdev); + nbd_bdev_reset(bdev); + /* + * We want to give the run thread a chance to wait for everybody + * to clean up and then do it's own cleanup. + */ + if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) && + nbd->num_connections) { + int i; + + for (i = 0; i < nbd->num_connections; i++) + kfree(nbd->socks[i]); + kfree(nbd->socks); + nbd->socks = NULL; + nbd->num_connections = 0; + } + nbd->task_setup = NULL; + + return 0; +} + +static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev) +{ + struct recv_thread_args *args; + int num_connections = nbd->num_connections; + int error = 0, i; + + if (nbd->task_recv) + return -EBUSY; + if (!nbd->socks) + return -EINVAL; + if (num_connections > 1 && + !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { + dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); + error = -EINVAL; + goto out_err; + } + + set_bit(NBD_RUNNING, &nbd->runtime_flags); + blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); + args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); + if (!args) { + error = -ENOMEM; + goto out_err; + } + nbd->task_recv = current; + mutex_unlock(&nbd->config_lock); + + nbd_parse_flags(nbd, bdev); + + error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); + if (error) { + dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + goto out_recv; + } + + nbd_size_update(nbd, bdev); + + nbd_dev_dbg_init(nbd); + for (i = 0; i < num_connections; i++) { + sk_set_memalloc(nbd->socks[i]->sock->sk); + atomic_inc(&nbd->recv_threads); + INIT_WORK(&args[i].work, recv_work); + args[i].nbd = nbd; + args[i].index = i; + queue_work(recv_workqueue, &args[i].work); + } + wait_event_interruptible(nbd->recv_wq, + atomic_read(&nbd->recv_threads) == 0); + for (i = 0; i < num_connections; i++) + flush_work(&args[i].work); + nbd_dev_dbg_close(nbd); + nbd_size_clear(nbd, bdev); + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); +out_recv: + mutex_lock(&nbd->config_lock); + nbd->task_recv = NULL; +out_err: + clear_bit(NBD_RUNNING, &nbd->runtime_flags); + nbd_clear_sock(nbd, bdev); + + /* user requested, ignore socket errors */ + if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) + error = 0; + if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags)) + error = -ETIMEDOUT; + + nbd_reset(nbd); + return error; +} /* Must be called with config_lock held */ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { switch (cmd) { - case NBD_DISCONNECT: { - dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); - if (!nbd->socks) - return -EINVAL; - - mutex_unlock(&nbd->config_lock); - fsync_bdev(bdev); - mutex_lock(&nbd->config_lock); - - /* Check again after getting mutex back. */ - if (!nbd->socks) - return -EINVAL; - - if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, - &nbd->runtime_flags)) - send_disconnects(nbd); - return 0; - } - + case NBD_DISCONNECT: + return nbd_disconnect(nbd, bdev); case NBD_CLEAR_SOCK: - sock_shutdown(nbd); - nbd_clear_que(nbd); - kill_bdev(bdev); - nbd_bdev_reset(bdev); - /* - * We want to give the run thread a chance to wait for everybody - * to clean up and then do it's own cleanup. - */ - if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) { - int i; - - for (i = 0; i < nbd->num_connections; i++) - kfree(nbd->socks[i]); - kfree(nbd->socks); - nbd->socks = NULL; - nbd->num_connections = 0; - nbd->task_setup = NULL; - } + return nbd_clear_sock(nbd, bdev); + case NBD_SET_SOCK: + return nbd_add_socket(nbd, bdev, arg); + case NBD_SET_BLKSIZE: + nbd_size_set(nbd, bdev, arg, + div_s64(nbd->bytesize, arg)); return 0; - - case NBD_SET_SOCK: { - int err; - struct socket *sock = sockfd_lookup(arg, &err); - - if (!sock) - return err; - - err = nbd_add_socket(nbd, sock); - if (!err && max_part) - bdev->bd_invalidated = 1; - - return err; - } - - case NBD_SET_BLKSIZE: { - loff_t bsize = div_s64(nbd->bytesize, arg); - - return nbd_size_set(nbd, bdev, arg, bsize); - } - case NBD_SET_SIZE: - return nbd_size_set(nbd, bdev, nbd->blksize, - div_s64(arg, nbd->blksize)); - + nbd_size_set(nbd, bdev, nbd->blksize, + div_s64(arg, nbd->blksize)); + return 0; case NBD_SET_SIZE_BLOCKS: - return nbd_size_set(nbd, bdev, nbd->blksize, arg); - + nbd_size_set(nbd, bdev, nbd->blksize, arg); + return 0; case NBD_SET_TIMEOUT: nbd->tag_set.timeout = arg * HZ; return 0; @@ -741,85 +799,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_FLAGS: nbd->flags = arg; return 0; - - case NBD_DO_IT: { - struct recv_thread_args *args; - int num_connections = nbd->num_connections; - int error = 0, i; - - if (nbd->task_recv) - return -EBUSY; - if (!nbd->socks) - return -EINVAL; - if (num_connections > 1 && - !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { - dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); - error = -EINVAL; - goto out_err; - } - - set_bit(NBD_RUNNING, &nbd->runtime_flags); - blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); - args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); - if (!args) { - error = -ENOMEM; - goto out_err; - } - nbd->task_recv = current; - mutex_unlock(&nbd->config_lock); - - nbd_parse_flags(nbd, bdev); - - error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); - if (error) { - dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - goto out_recv; - } - - nbd_size_update(nbd, bdev); - - nbd_dev_dbg_init(nbd); - for (i = 0; i < num_connections; i++) { - sk_set_memalloc(nbd->socks[i]->sock->sk); - atomic_inc(&nbd->recv_threads); - INIT_WORK(&args[i].work, recv_work); - args[i].nbd = nbd; - args[i].index = i; - queue_work(recv_workqueue, &args[i].work); - } - wait_event_interruptible(nbd->recv_wq, - atomic_read(&nbd->recv_threads) == 0); - for (i = 0; i < num_connections; i++) - flush_work(&args[i].work); - nbd_dev_dbg_close(nbd); - nbd_size_clear(nbd, bdev); - device_remove_file(disk_to_dev(nbd->disk), &pid_attr); -out_recv: - mutex_lock(&nbd->config_lock); - nbd->task_recv = NULL; -out_err: - sock_shutdown(nbd); - nbd_clear_que(nbd); - kill_bdev(bdev); - nbd_bdev_reset(bdev); - - /* user requested, ignore socket errors */ - if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) - error = 0; - if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags)) - error = -ETIMEDOUT; - - nbd_reset(nbd); - return error; - } - + case NBD_DO_IT: + return nbd_start_device(nbd, bdev); case NBD_CLEAR_QUE: /* * This is for compatibility only. The queue is always cleared * by NBD_DO_IT or NBD_CLEAR_SOCK. */ return 0; - case NBD_PRINT_DEBUG: /* * For compatibility only, we no longer keep a list of @@ -1134,8 +1121,10 @@ static int __init nbd_init(void) if (!recv_workqueue) return -ENOMEM; - if (register_blkdev(NBD_MAJOR, "nbd")) + if (register_blkdev(NBD_MAJOR, "nbd")) { + destroy_workqueue(recv_workqueue); return -EIO; + } nbd_dbg_init(); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 67d76f21fecd..28955b94d2b2 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -328,13 +328,15 @@ static void dm_softirq_done(struct request *rq) int rw; if (!clone) { - rq_end_stats(tio->md, rq); + struct mapped_device *md = tio->md; + + rq_end_stats(md, rq); rw = rq_data_dir(rq); if (!rq->q->mq_ops) blk_end_request_all(rq, tio->error); else blk_mq_end_request(rq, tio->error); - rq_completed(tio->md, rw, false); + rq_completed(md, rw, false); return; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 44a1a257e0b5..25ec4e585220 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,11 @@ EXPORT_SYMBOL_GPL(nvme_max_retries); static int nvme_char_major; module_param(nvme_char_major, int, 0); +static unsigned long default_ps_max_latency_us = 25000; +module_param(default_ps_max_latency_us, ulong, 0644); +MODULE_PARM_DESC(default_ps_max_latency_us, + "max power saving latency for new devices; use PM QOS to change per device"); + static LIST_HEAD(nvme_ctrl_list); static DEFINE_SPINLOCK(dev_list_lock); @@ -560,7 +566,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ c.identify.opcode = nvme_admin_identify; - c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL); + c.identify.cns = NVME_ID_CNS_CTRL; *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); if (!*id) @@ -578,7 +584,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n struct nvme_command c = { }; c.identify.opcode = nvme_admin_identify; - c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST); + c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST; c.identify.nsid = cpu_to_le32(nsid); return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); } @@ -590,8 +596,9 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, int error; /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ - c.identify.opcode = nvme_admin_identify, - c.identify.nsid = cpu_to_le32(nsid), + c.identify.opcode = nvme_admin_identify; + c.identify.nsid = cpu_to_le32(nsid); + c.identify.cns = NVME_ID_CNS_NS; *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); if (!*id) @@ -1251,6 +1258,176 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } +static void nvme_configure_apst(struct nvme_ctrl *ctrl) +{ + /* + * APST (Autonomous Power State Transition) lets us program a + * table of power state transitions that the controller will + * perform automatically. We configure it with a simple + * heuristic: we are willing to spend at most 2% of the time + * transitioning between power states. Therefore, when running + * in any given state, we will enter the next lower-power + * non-operational state after waiting 100 * (enlat + exlat) + * microseconds, as long as that state's total latency is under + * the requested maximum latency. + * + * We will not autonomously enter any non-operational state for + * which the total latency exceeds ps_max_latency_us. Users + * can set ps_max_latency_us to zero to turn off APST. + */ + + unsigned apste; + struct nvme_feat_auto_pst *table; + int ret; + + /* + * If APST isn't supported or if we haven't been initialized yet, + * then don't do anything. + */ + if (!ctrl->apsta) + return; + + if (ctrl->npss > 31) { + dev_warn(ctrl->device, "NPSS is invalid; not using APST\n"); + return; + } + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return; + + if (ctrl->ps_max_latency_us == 0) { + /* Turn off APST. */ + apste = 0; + } else { + __le64 target = cpu_to_le64(0); + int state; + + /* + * Walk through all states from lowest- to highest-power. + * According to the spec, lower-numbered states use more + * power. NPSS, despite the name, is the index of the + * lowest-power state, not the number of states. + */ + for (state = (int)ctrl->npss; state >= 0; state--) { + u64 total_latency_us, transition_ms; + + if (target) + table->entries[state] = target; + + /* + * Is this state a useful non-operational state for + * higher-power states to autonomously transition to? + */ + if (!(ctrl->psd[state].flags & + NVME_PS_FLAGS_NON_OP_STATE)) + continue; + + total_latency_us = + (u64)le32_to_cpu(ctrl->psd[state].entry_lat) + + + le32_to_cpu(ctrl->psd[state].exit_lat); + if (total_latency_us > ctrl->ps_max_latency_us) + continue; + + /* + * This state is good. Use it as the APST idle + * target for higher power states. + */ + transition_ms = total_latency_us + 19; + do_div(transition_ms, 20); + if (transition_ms > (1 << 24) - 1) + transition_ms = (1 << 24) - 1; + + target = cpu_to_le64((state << 3) | + (transition_ms << 8)); + } + + apste = 1; + } + + ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste, + table, sizeof(*table), NULL); + if (ret) + dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret); + + kfree(table); +} + +static void nvme_set_latency_tolerance(struct device *dev, s32 val) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + u64 latency; + + switch (val) { + case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT: + case PM_QOS_LATENCY_ANY: + latency = U64_MAX; + break; + + default: + latency = val; + } + + if (ctrl->ps_max_latency_us != latency) { + ctrl->ps_max_latency_us = latency; + nvme_configure_apst(ctrl); + } +} + +struct nvme_core_quirk_entry { + /* + * NVMe model and firmware strings are padded with spaces. For + * simplicity, strings in the quirk table are padded with NULLs + * instead. + */ + u16 vid; + const char *mn; + const char *fr; + unsigned long quirks; +}; + +static const struct nvme_core_quirk_entry core_quirks[] = { + /* + * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes + * the controller to go out to lunch. It dies when the watchdog + * timer reads CSTS and gets 0xffffffff. + */ + { + .vid = 0x144d, + .fr = "BXW75D0Q", + .quirks = NVME_QUIRK_NO_APST, + }, +}; + +/* match is null-terminated but idstr is space-padded. */ +static bool string_matches(const char *idstr, const char *match, size_t len) +{ + size_t matchlen; + + if (!match) + return true; + + matchlen = strlen(match); + WARN_ON_ONCE(matchlen > len); + + if (memcmp(idstr, match, matchlen)) + return false; + + for (; matchlen < len; matchlen++) + if (idstr[matchlen] != ' ') + return false; + + return true; +} + +static bool quirk_matches(const struct nvme_id_ctrl *id, + const struct nvme_core_quirk_entry *q) +{ + return q->vid == le16_to_cpu(id->vid) && + string_matches(id->mn, q->mn, sizeof(id->mn)) && + string_matches(id->fr, q->fr, sizeof(id->fr)); +} + /* * Initialize the cached copies of the Identify data and various controller * register in our nvme_ctrl structure. This should be called as soon as @@ -1262,6 +1439,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) u64 cap; int ret, page_shift; u32 max_hw_sectors; + u8 prev_apsta; ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); if (ret) { @@ -1285,6 +1463,24 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) return -EIO; } + if (!ctrl->identified) { + /* + * Check for quirks. Quirk can depend on firmware version, + * so, in principle, the set of quirks present can change + * across a reset. As a possible future enhancement, we + * could re-scan for quirks every time we reinitialize + * the device, but we'd have to make sure that the driver + * behaves intelligently if the quirks change. + */ + + int i; + + for (i = 0; i < ARRAY_SIZE(core_quirks); i++) { + if (quirk_matches(id, &core_quirks[i])) + ctrl->quirks |= core_quirks[i].quirks; + } + } + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); @@ -1305,6 +1501,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); + ctrl->npss = id->npss; + prev_apsta = ctrl->apsta; + ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta; + memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); + if (ctrl->ops->is_fabrics) { ctrl->icdoff = le16_to_cpu(id->icdoff); ctrl->ioccsz = le32_to_cpu(id->ioccsz); @@ -1328,6 +1529,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } kfree(id); + + if (ctrl->apsta && !prev_apsta) + dev_pm_qos_expose_latency_tolerance(ctrl->device); + else if (!ctrl->apsta && prev_apsta) + dev_pm_qos_hide_latency_tolerance(ctrl->device); + + nvme_configure_apst(ctrl); + + ctrl->identified = true; + return ret; } EXPORT_SYMBOL_GPL(nvme_init_identify); @@ -1577,6 +1788,29 @@ static ssize_t nvme_sysfs_show_transport(struct device *dev, } static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL); +static ssize_t nvme_sysfs_show_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + static const char *const state_name[] = { + [NVME_CTRL_NEW] = "new", + [NVME_CTRL_LIVE] = "live", + [NVME_CTRL_RESETTING] = "resetting", + [NVME_CTRL_RECONNECTING]= "reconnecting", + [NVME_CTRL_DELETING] = "deleting", + [NVME_CTRL_DEAD] = "dead", + }; + + if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && + state_name[ctrl->state]) + return sprintf(buf, "%s\n", state_name[ctrl->state]); + + return sprintf(buf, "unknown state\n"); +} + +static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL); + static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev, struct device_attribute *attr, char *buf) @@ -1609,6 +1843,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_transport.attr, &dev_attr_subsysnqn.attr, &dev_attr_address.attr, + &dev_attr_state.attr, NULL }; @@ -2065,6 +2300,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, list_add_tail(&ctrl->node, &nvme_ctrl_list); spin_unlock(&dev_list_lock); + /* + * Initialize latency tolerance controls. The sysfs files won't + * be visible to userspace unless the device actually supports APST. + */ + ctrl->device->power.set_latency_tolerance = nvme_set_latency_tolerance; + dev_pm_qos_update_user_latency_tolerance(ctrl->device, + min(default_ps_max_latency_us, (unsigned long)S32_MAX)); + return 0; out_release_instance: nvme_release_instance(ctrl); @@ -2090,9 +2333,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) * Revalidating a dead namespace sets capacity to 0. This will * end buffered writers dirtying pages that can't be synced. */ - if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags)) - revalidate_disk(ns->disk); - + if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) + continue; + revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); blk_mq_abort_requeue_list(ns->queue); blk_mq_start_stopped_hw_queues(ns->queue, true); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 916d13608059..5b7386f69f4d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -480,11 +480,16 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue); * being implemented to the common NVMe fabrics library. Part of * the overall init sequence of starting up a fabrics driver. */ -void nvmf_register_transport(struct nvmf_transport_ops *ops) +int nvmf_register_transport(struct nvmf_transport_ops *ops) { + if (!ops->create_ctrl) + return -EINVAL; + mutex_lock(&nvmf_transports_mutex); list_add_tail(&ops->entry, &nvmf_transports); mutex_unlock(&nvmf_transports_mutex); + + return 0; } EXPORT_SYMBOL_GPL(nvmf_register_transport); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 924145c979f1..156018182ce4 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -128,7 +128,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl); int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid); -void nvmf_register_transport(struct nvmf_transport_ops *ops); +int nvmf_register_transport(struct nvmf_transport_ops *ops); void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index fb51a8de9b29..9690beb15e69 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2353,18 +2353,6 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, /* sanity checks */ - /* FC-NVME supports 64-byte SQE only */ - if (ctrl->ctrl.ioccsz != 4) { - dev_err(ctrl->ctrl.device, "ioccsz %d is not supported!\n", - ctrl->ctrl.ioccsz); - goto out_remove_admin_queue; - } - /* FC-NVME supports 16-byte CQE only */ - if (ctrl->ctrl.iorcsz != 1) { - dev_err(ctrl->ctrl.device, "iorcsz %d is not supported!\n", - ctrl->ctrl.iorcsz); - goto out_remove_admin_queue; - } /* FC-NVME does not have other data in the capsule */ if (ctrl->ctrl.icdoff) { dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", @@ -2562,8 +2550,7 @@ static int __init nvme_fc_init_module(void) if (!nvme_fc_wq) return -ENOMEM; - nvmf_register_transport(&nvme_fc_transport); - return 0; + return nvmf_register_transport(&nvme_fc_transport); } static void __exit nvme_fc_exit_module(void) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 14cfc6f7facb..a3da1e90b99d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -78,6 +78,11 @@ enum nvme_quirks { * readiness, which is done by reading the NVME_CSTS_RDY bit. */ NVME_QUIRK_DELAY_BEFORE_CHK_RDY = (1 << 3), + + /* + * APST should not be used. + */ + NVME_QUIRK_NO_APST = (1 << 4), }; /* @@ -112,6 +117,7 @@ enum nvme_ctrl_state { struct nvme_ctrl { enum nvme_ctrl_state state; + bool identified; spinlock_t lock; const struct nvme_ctrl_ops *ops; struct request_queue *admin_q; @@ -147,13 +153,19 @@ struct nvme_ctrl { u32 vs; u32 sgls; u16 kas; + u8 npss; + u8 apsta; unsigned int kato; bool subsystem; unsigned long quirks; + struct nvme_id_power_state psd[32]; struct work_struct scan_work; struct work_struct async_event_work; struct delayed_work ka_work; + /* Power saving configuration */ + u64 ps_max_latency_us; + /* Fabrics only */ u16 sqsize; u32 ioccsz; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ddc51adb594d..57a1af52b06e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -613,10 +613,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(&nvmeq->q_lock); if (unlikely(nvmeq->cq_vector < 0)) { - if (ns && !test_bit(NVME_NS_DEAD, &ns->flags)) - ret = BLK_MQ_RQ_QUEUE_BUSY; - else - ret = BLK_MQ_RQ_QUEUE_ERROR; + ret = BLK_MQ_RQ_QUEUE_ERROR; spin_unlock_irq(&nvmeq->q_lock); goto out_cleanup_iod; } @@ -1739,7 +1736,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); - kfree(dev->ctrl.opal_dev); + free_opal_dev(dev->ctrl.opal_dev); kfree(dev); } @@ -1789,14 +1786,17 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; - if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) { - dev->ctrl.opal_dev = - init_opal_dev(&dev->ctrl, &nvme_sec_submit); + if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) { + if (!dev->ctrl.opal_dev) + dev->ctrl.opal_dev = + init_opal_dev(&dev->ctrl, &nvme_sec_submit); + else if (was_suspend) + opal_unlock_from_suspend(dev->ctrl.opal_dev); + } else { + free_opal_dev(dev->ctrl.opal_dev); + dev->ctrl.opal_dev = NULL; } - if (was_suspend) - opal_unlock_from_suspend(dev->ctrl.opal_dev); - result = nvme_setup_io_queues(dev); if (result) goto out; @@ -2001,8 +2001,10 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); - if (!pci_device_is_present(pdev)) + if (!pci_device_is_present(pdev)) { nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); + nvme_dev_disable(dev, false); + } flush_work(&dev->reset_work); nvme_uninit_ctrl(&dev->ctrl); @@ -2121,6 +2123,7 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, { 0, } }; MODULE_DEVICE_TABLE(pci, nvme_id_table); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a75e95d42b3f..49b2121af689 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -42,28 +42,6 @@ #define NVME_RDMA_MAX_INLINE_SEGMENTS 1 -static const char *const nvme_rdma_cm_status_strs[] = { - [NVME_RDMA_CM_INVALID_LEN] = "invalid length", - [NVME_RDMA_CM_INVALID_RECFMT] = "invalid record format", - [NVME_RDMA_CM_INVALID_QID] = "invalid queue ID", - [NVME_RDMA_CM_INVALID_HSQSIZE] = "invalid host SQ size", - [NVME_RDMA_CM_INVALID_HRQSIZE] = "invalid host RQ size", - [NVME_RDMA_CM_NO_RSC] = "resource not found", - [NVME_RDMA_CM_INVALID_IRD] = "invalid IRD", - [NVME_RDMA_CM_INVALID_ORD] = "Invalid ORD", -}; - -static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) -{ - size_t index = status; - - if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) && - nvme_rdma_cm_status_strs[index]) - return nvme_rdma_cm_status_strs[index]; - else - return "unrecognized reason"; -}; - /* * We handle AEN commands ourselves and don't even let the * block layer know about them. @@ -155,6 +133,10 @@ struct nvme_rdma_ctrl { struct sockaddr addr; struct sockaddr_in addr_in; }; + union { + struct sockaddr src_addr; + struct sockaddr_in src_addr_in; + }; struct nvme_ctrl ctrl; }; @@ -567,6 +549,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, int idx, size_t queue_size) { struct nvme_rdma_queue *queue; + struct sockaddr *src_addr = NULL; int ret; queue = &ctrl->queues[idx]; @@ -589,7 +572,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, } queue->cm_error = -ETIMEDOUT; - ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr, + if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) + src_addr = &ctrl->src_addr; + + ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr, NVME_RDMA_CONNECT_TIMEOUT_MS); if (ret) { dev_info(ctrl->ctrl.device, @@ -1905,6 +1891,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, goto out_free_ctrl; } + if (opts->mask & NVMF_OPT_HOST_TRADDR) { + ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in, + opts->host_traddr); + if (ret) { + pr_err("malformed src IP address passed: %s\n", + opts->host_traddr); + goto out_free_ctrl; + } + } + if (opts->mask & NVMF_OPT_TRSVCID) { u16 port; @@ -2016,7 +2012,8 @@ out_free_ctrl: static struct nvmf_transport_ops nvme_rdma_transport = { .name = "rdma", .required_opts = NVMF_OPT_TRADDR, - .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY, + .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | + NVMF_OPT_HOST_TRADDR, .create_ctrl = nvme_rdma_create_ctrl, }; @@ -2063,8 +2060,7 @@ static int __init nvme_rdma_init_module(void) return ret; } - nvmf_register_transport(&nvme_rdma_transport); - return 0; + return nvmf_register_transport(&nvme_rdma_transport); } static void __exit nvme_rdma_cleanup_module(void) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 95ae52390478..94e524fea568 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -41,7 +41,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); if (!ns) { status = NVME_SC_INVALID_NS; - pr_err("nvmet : Counld not find namespace id : %d\n", + pr_err("nvmet : Could not find namespace id : %d\n", le32_to_cpu(req->cmd->get_log_page.nsid)); goto out; } @@ -509,7 +509,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req) break; case nvme_admin_identify: req->data_len = 4096; - switch (le32_to_cpu(cmd->identify.cns)) { + switch (cmd->identify.cns) { case NVME_ID_CNS_NS: req->execute = nvmet_execute_identify_ns; return 0; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index fc5ba2f9e15f..5267ce20c12d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -17,6 +17,7 @@ #include "nvmet.h" static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; +static DEFINE_IDA(cntlid_ida); /* * This read/write semaphore is used to synchronize access to configuration @@ -749,7 +750,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!ctrl->sqs) goto out_free_cqs; - ret = ida_simple_get(&subsys->cntlid_ida, + ret = ida_simple_get(&cntlid_ida, NVME_CNTLID_MIN, NVME_CNTLID_MAX, GFP_KERNEL); if (ret < 0) { @@ -819,7 +820,7 @@ static void nvmet_ctrl_free(struct kref *ref) flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fatal_err_work); - ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid); + ida_simple_remove(&cntlid_ida, ctrl->cntlid); nvmet_subsys_put(subsys); kfree(ctrl->sqs); @@ -918,9 +919,6 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, mutex_init(&subsys->lock); INIT_LIST_HEAD(&subsys->namespaces); INIT_LIST_HEAD(&subsys->ctrls); - - ida_init(&subsys->cntlid_ida); - INIT_LIST_HEAD(&subsys->hosts); return subsys; @@ -933,7 +931,6 @@ static void nvmet_subsys_free(struct kref *ref) WARN_ON_ONCE(!list_empty(&subsys->namespaces)); - ida_destroy(&subsys->cntlid_ida); kfree(subsys->subsysnqn); kfree(subsys); } @@ -976,6 +973,7 @@ static void __exit nvmet_exit(void) { nvmet_exit_configfs(); nvmet_exit_discovery(); + ida_destroy(&cntlid_ida); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 12f39eea569f..af8aabf05335 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -186,14 +186,14 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) } case nvme_admin_identify: req->data_len = 4096; - switch (le32_to_cpu(cmd->identify.cns)) { + switch (cmd->identify.cns) { case NVME_ID_CNS_CTRL: req->execute = nvmet_execute_identify_disc_ctrl; return 0; default: pr_err("nvmet: unsupported identify cns %d\n", - le32_to_cpu(cmd->identify.cns)); + cmd->identify.cns); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index f4088198cd0d..8bd022af3df6 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -153,8 +153,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } - pr_info("creating controller %d for NQN %s.\n", - ctrl->cntlid, ctrl->hostnqn); + pr_info("creating controller %d for subsystem %s for NQN %s.\n", + ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn); req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); out: @@ -220,7 +220,7 @@ int nvmet_parse_connect_cmd(struct nvmet_req *req) req->ns = NULL; - if (req->cmd->common.opcode != nvme_fabrics_command) { + if (cmd->common.opcode != nvme_fabrics_command) { pr_err("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index ba57f9852bde..8f483ee7868c 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1817,16 +1817,14 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) /* data no longer needed */ nvmet_fc_free_tgt_pgs(fod); - if (fcpreq->fcp_error || abort) - nvmet_req_complete(&fod->req, fcpreq->fcp_error); - + nvmet_req_complete(&fod->req, fcpreq->fcp_error); return; } switch (fcpreq->op) { case NVMET_FCOP_WRITEDATA: - if (abort || fcpreq->fcp_error || + if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); @@ -1849,7 +1847,7 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: - if (abort || fcpreq->fcp_error || + if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { /* data no longer needed */ nvmet_fc_free_tgt_pgs(fod); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index f3862e38f574..d1f06e7768ff 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -724,8 +724,7 @@ static int __init nvme_loop_init_module(void) ret = nvmet_register_transport(&nvme_loop_ops); if (ret) return ret; - nvmf_register_transport(&nvme_loop_transport); - return 0; + return nvmf_register_transport(&nvme_loop_transport); } static void __exit nvme_loop_cleanup_module(void) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index cc7ad06b43a7..1370eee0a3c0 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -142,7 +142,6 @@ struct nvmet_subsys { unsigned int max_nsid; struct list_head ctrls; - struct ida cntlid_ida; struct list_head hosts; bool allow_any_host; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 60990220bd83..9aa1da3778b3 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1041,6 +1041,9 @@ static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id, { struct nvme_rdma_cm_rej rej; + pr_debug("rejecting connect request: status %d (%s)\n", + status, nvme_rdma_cm_msg(status)); + rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); rej.sts = cpu_to_le16(status); @@ -1091,7 +1094,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL); if (queue->idx < 0) { ret = NVME_RDMA_CM_NO_RSC; - goto out_free_queue; + goto out_destroy_sq; } ret = nvmet_rdma_alloc_rsps(queue); @@ -1135,7 +1138,6 @@ out_destroy_sq: out_free_queue: kfree(queue); out_reject: - pr_debug("rejecting connect request with status code %d\n", ret); nvmet_rdma_cm_reject(cm_id, ret); return NULL; } @@ -1188,7 +1190,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ndev = nvmet_rdma_find_get_device(cm_id); if (!ndev) { - pr_err("no client data!\n"); nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC); return -ECONNREFUSED; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 912fbc3b4543..3e32dc954c3c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1167,7 +1167,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) /* zero out the cmd, except for the embedded scsi_request */ memset((char *)cmd + sizeof(cmd->req), 0, - sizeof(*cmd) - sizeof(cmd->req)); + sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size); cmd->device = dev; cmd->sense_buffer = buf; diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 126a5ee00987..f94535130a34 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -227,27 +227,31 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) return 0; } + q = blk_alloc_queue(GFP_KERNEL); + if (!q) + return -ENOMEM; + q->cmd_size = sizeof(struct scsi_request); + if (rphy) { - q = blk_init_queue(sas_non_host_smp_request, NULL); + q->request_fn = sas_non_host_smp_request; dev = &rphy->dev; name = dev_name(dev); release = NULL; } else { - q = blk_init_queue(sas_host_smp_request, NULL); + q->request_fn = sas_host_smp_request; dev = &shost->shost_gendev; snprintf(namebuf, sizeof(namebuf), "sas_host%d", shost->host_no); name = namebuf; release = sas_host_release; } - if (!q) - return -ENOMEM; + error = blk_init_allocated_queue(q); + if (error) + goto out_cleanup_queue; error = bsg_register_queue(q, dev, name, release); - if (error) { - blk_cleanup_queue(q); - return -ENOMEM; - } + if (error) + goto out_cleanup_queue; if (rphy) rphy->q = q; @@ -261,6 +265,10 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); return 0; + +out_cleanup_queue: + blk_cleanup_queue(q); + return error; } static void sas_bsg_remove(struct Scsi_Host *shost, struct sas_rphy *rphy) diff --git a/fs/block_dev.c b/fs/block_dev.c index 73031ec54a7b..1c62845a72c7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1043,13 +1043,22 @@ static struct block_device *bd_acquire(struct inode *inode) spin_lock(&bdev_lock); bdev = inode->i_bdev; - if (bdev) { + if (bdev && !inode_unhashed(bdev->bd_inode)) { bdgrab(bdev); spin_unlock(&bdev_lock); return bdev; } spin_unlock(&bdev_lock); + /* + * i_bdev references block device inode that was already shut down + * (corresponding device got removed). Remove the reference and look + * up block device inode again just in case new device got + * reestablished under the same device number. + */ + if (bdev) + bd_forget(inode); + bdev = bdget(inode->i_rdev); if (bdev) { spin_lock(&bdev_lock); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8e4df3d6c8cd..001d30d727c5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -33,6 +33,7 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx **ctxs; unsigned int nr_ctx; + wait_queue_t dispatch_wait; atomic_t wait_index; struct blk_mq_tags *tags; @@ -160,6 +161,7 @@ enum { BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, + BLK_MQ_S_TAG_WAITING = 3, BLK_MQ_MAX_DEPTH = 10240, diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index bf240a3cbf99..a72fd04aa5e1 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -29,6 +29,30 @@ enum nvme_rdma_cm_status { NVME_RDMA_CM_INVALID_ORD = 0x08, }; +static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) +{ + switch (status) { + case NVME_RDMA_CM_INVALID_LEN: + return "invalid length"; + case NVME_RDMA_CM_INVALID_RECFMT: + return "invalid record format"; + case NVME_RDMA_CM_INVALID_QID: + return "invalid queue ID"; + case NVME_RDMA_CM_INVALID_HSQSIZE: + return "invalid host SQ size"; + case NVME_RDMA_CM_INVALID_HRQSIZE: + return "invalid host RQ size"; + case NVME_RDMA_CM_NO_RSC: + return "resource not found"; + case NVME_RDMA_CM_INVALID_IRD: + return "invalid IRD"; + case NVME_RDMA_CM_INVALID_ORD: + return "Invalid ORD"; + default: + return "unrecognized reason"; + } +} + /** * struct nvme_rdma_cm_req - rdma connect request * diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 0b676a02cf3e..c43d435d4225 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -579,6 +579,12 @@ struct nvme_write_zeroes_cmd { __le16 appmask; }; +/* Features */ + +struct nvme_feat_auto_pst { + __le64 entries[32]; +}; + /* Admin commands */ enum nvme_admin_opcode { @@ -644,7 +650,9 @@ struct nvme_identify { __le32 nsid; __u64 rsvd2[2]; union nvme_data_ptr dptr; - __le32 cns; + __u8 cns; + __u8 rsvd3; + __le16 ctrlid; __u32 rsvd11[5]; }; diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index deee23d012e7..04b124fca51e 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -27,6 +27,7 @@ typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send); #ifdef CONFIG_BLK_SED_OPAL +void free_opal_dev(struct opal_dev *dev); bool opal_unlock_from_suspend(struct opal_dev *dev); struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv); int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr); @@ -51,6 +52,10 @@ static inline bool is_sed_ioctl(unsigned int cmd) return false; } #else +static inline void free_opal_dev(struct opal_dev *dev) +{ +} + static inline bool is_sed_ioctl(unsigned int cmd) { return false;