Merge branch 'for-4.6/drivers' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "This is the block driver pull request for this merge window.  It sits
  on top of for-4.6/core, that was just sent out.

  This contains:

   - A set of fixes for lightnvm.  One from Alan, fixing an overflow,
     and the rest from the usual suspects, Javier and Matias.

   - A set of fixes for nbd from Markus and Dan, and a fixup from Arnd
     for correct usage of the signed 64-bit divider.

   - A set of bug fixes for the Micron mtip32xx, from Asai.

   - A fix for the brd discard handling from Bart.

   - Update the maintainers entry for cciss, since that hardware has
     transferred ownership.

   - Three bug fixes for bcache from Eric Wheeler.

   - Set of fixes for xen-blk{back,front} from Jan and Konrad.

   - Removal of the cpqarray driver.  It has been disabled in Kconfig
     since 2013, and we were initially scheduled to remove it in 3.15.

   - Various updates and fixes for NVMe, with the most important being:

        - Removal of the per-device NVMe thread, replacing that with a
          watchdog timer instead. From Christoph.

        - Exposing the namespace WWID through sysfs, from Keith.

        - Set of cleanups from Ming Lin.

        - Logging the controller device name instead of the underlying
          PCI device name, from Sagi.

        - And a bunch of fixes and optimizations from the usual suspects
          in this area"

* 'for-4.6/drivers' of git://git.kernel.dk/linux-block: (49 commits)
  NVMe: Expose ns wwid through single sysfs entry
  drivers:block: cpqarray clean up
  brd: Fix discard request processing
  cpqarray: remove it from the kernel
  cciss: update MAINTAINERS
  NVMe: Remove unused sq_head read in completion path
  bcache: fix cache_set_flush() NULL pointer dereference on OOM
  bcache: cleaned up error handling around register_cache()
  bcache: fix race of writeback thread starting before complete initialization
  NVMe: Create discard zero quirk white list
  nbd: use correct div_s64 helper
  mtip32xx: remove unneeded variable in mtip_cmd_timeout()
  lightnvm: generalize rrpc ppa calculations
  lightnvm: remove struct nvm_dev->total_blocks
  lightnvm: rename ->nr_pages to ->nr_sects
  lightnvm: update closed list outside of intr context
  xen/blback: Fit the important information of the thread in 17 characters
  lightnvm: fold get bb tbl when using dual/quad plane mode
  lightnvm: fix up nonsensical configure overrun checking
  xen-blkback: advertise indirect segment support earlier
  ...
This commit is contained in:
Linus Torvalds 2016-03-18 17:13:31 -07:00
commit 237045fc3c
26 changed files with 792 additions and 3009 deletions

View File

@ -1,93 +0,0 @@
This driver is for Compaq's SMART2 Intelligent Disk Array Controllers.
Supported Cards:
----------------
This driver is known to work with the following cards:
* SMART (EISA)
* SMART-2/E (EISA)
* SMART-2/P
* SMART-2DH
* SMART-2SL
* SMART-221
* SMART-3100ES
* SMART-3200
* Integrated Smart Array Controller
* SA 4200
* SA 4250ES
* SA 431
* RAID LC2 Controller
It should also work with some really old Disk array adapters, but I am
unable to test against these cards:
* IDA
* IDA-2
* IAES
EISA Controllers:
-----------------
If you want to use an EISA controller you'll have to supply some
modprobe/lilo parameters. If the driver is compiled into the kernel, must
give it the controller's IO port address at boot time (it is not
necessary to specify the IRQ). For example, if you had two SMART-2/E
controllers, in EISA slots 1 and 2 you'd give it a boot argument like
this:
smart2=0x1000,0x2000
If you were loading the driver as a module, you'd give load it like this:
modprobe cpqarray eisa=0x1000,0x2000
You can use EISA and PCI adapters at the same time.
Device Naming:
--------------
You need some entries in /dev for the ida device. MAKEDEV in the /dev
directory can make device nodes for you automatically. The device setup is
as follows:
Major numbers:
72 ida0
73 ida1
74 ida2
75 ida3
76 ida4
77 ida5
78 ida6
79 ida7
Minor numbers:
b7 b6 b5 b4 b3 b2 b1 b0
|----+----| |----+----|
| |
| +-------- Partition ID (0=wholedev, 1-15 partition)
|
+-------------------- Logical Volume number
The device naming scheme is:
/dev/ida/c0d0 Controller 0, disk 0, whole device
/dev/ida/c0d0p1 Controller 0, disk 0, partition 1
/dev/ida/c0d0p2 Controller 0, disk 0, partition 2
/dev/ida/c0d0p3 Controller 0, disk 0, partition 3
/dev/ida/c1d1 Controller 1, disk 1, whole device
/dev/ida/c1d1p1 Controller 1, disk 1, partition 1
/dev/ida/c1d1p2 Controller 1, disk 1, partition 2
/dev/ida/c1d1p3 Controller 1, disk 1, partition 3
Changelog:
==========
10-28-2004 : General cleanup, syntax fixes for in-kernel driver version.
James Nelson <james4765@gmail.com>
1999 : Original Document

View File

@ -5016,12 +5016,6 @@ T: git git://linuxtv.org/anttip/media_tree.git
S: Maintained
F: drivers/media/dvb-frontends/hd29l2*
HEWLETT-PACKARD SMART2 RAID DRIVER
L: iss_storagedev@hp.com
S: Orphan
F: Documentation/blockdev/cpqarray.txt
F: drivers/block/cpqarray.*
HEWLETT-PACKARD SMART ARRAY RAID DRIVER (hpsa)
M: Don Brace <don.brace@microsemi.com>
L: iss_storagedev@hp.com
@ -5034,9 +5028,9 @@ F: include/linux/cciss*.h
F: include/uapi/linux/cciss*.h
HEWLETT-PACKARD SMART CISS RAID DRIVER (cciss)
M: Don Brace <don.brace@pmcs.com>
M: Don Brace <don.brace@microsemi.com>
L: iss_storagedev@hp.com
L: storagedev@pmcs.com
L: esc.storagedev@microsemi.com
L: linux-scsi@vger.kernel.org
S: Supported
F: Documentation/blockdev/cciss.txt

View File

@ -110,16 +110,6 @@ source "drivers/block/mtip32xx/Kconfig"
source "drivers/block/zram/Kconfig"
config BLK_CPQ_DA
tristate "Compaq SMART2 support"
depends on PCI && VIRT_TO_BUS && 0
help
This is the driver for Compaq Smart Array controllers. Everyone
using these boards should say Y here. See the file
<file:Documentation/blockdev/cpqarray.txt> for the current list of
boards supported by this driver, and for further information on the
use of this driver.
config BLK_CPQ_CISS_DA
tristate "Compaq Smart Array 5xxx support"
depends on PCI

View File

@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
obj-$(CONFIG_XILINX_SYSACE) += xsysace.o

View File

@ -341,7 +341,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) ||
bio->bi_iter.bi_size & PAGE_MASK)
bio->bi_iter.bi_size & ~PAGE_MASK)
goto io_error;
discard_from_brd(brd, sector, bio->bi_iter.bi_size);
goto out;

File diff suppressed because it is too large Load Diff

View File

@ -1,126 +0,0 @@
/*
* Disk Array driver for Compaq SMART2 Controllers
* Copyright 1998 Compaq Computer Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Questions/Comments/Bugfixes to iss_storagedev@hp.com
*
* If you want to make changes, improve or add functionality to this
* driver, you'll probably need the Compaq Array Controller Interface
* Specificiation (Document number ECG086/1198)
*/
#ifndef CPQARRAY_H
#define CPQARRAY_H
#ifdef __KERNEL__
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/timer.h>
#endif
#include "ida_cmd.h"
#define IO_OK 0
#define IO_ERROR 1
#define NWD 16
#define NWD_SHIFT 4
#define IDA_TIMER (5*HZ)
#define IDA_TIMEOUT (10*HZ)
#define MISC_NONFATAL_WARN 0x01
typedef struct {
unsigned blk_size;
unsigned nr_blks;
unsigned cylinders;
unsigned heads;
unsigned sectors;
int usage_count;
} drv_info_t;
#ifdef __KERNEL__
struct ctlr_info;
typedef struct ctlr_info ctlr_info_t;
struct access_method {
void (*submit_command)(ctlr_info_t *h, cmdlist_t *c);
void (*set_intr_mask)(ctlr_info_t *h, unsigned long val);
unsigned long (*fifo_full)(ctlr_info_t *h);
unsigned long (*intr_pending)(ctlr_info_t *h);
unsigned long (*command_completed)(ctlr_info_t *h);
};
struct board_type {
__u32 board_id;
char *product_name;
struct access_method *access;
};
struct ctlr_info {
int ctlr;
char devname[8];
__u32 log_drv_map;
__u32 drv_assign_map;
__u32 drv_spare_map;
__u32 mp_failed_drv_map;
char firm_rev[4];
int ctlr_sig;
int log_drives;
int phys_drives;
struct pci_dev *pci_dev; /* NULL if EISA */
__u32 board_id;
char *product_name;
void __iomem *vaddr;
unsigned long paddr;
unsigned long io_mem_addr;
unsigned long io_mem_length;
int intr;
int usage_count;
drv_info_t drv[NWD];
struct proc_dir_entry *proc;
struct access_method access;
cmdlist_t *reqQ;
cmdlist_t *cmpQ;
cmdlist_t *cmd_pool;
dma_addr_t cmd_pool_dhandle;
unsigned long *cmd_pool_bits;
struct request_queue *queue;
spinlock_t lock;
unsigned int Qdepth;
unsigned int maxQsinceinit;
unsigned int nr_requests;
unsigned int nr_allocs;
unsigned int nr_frees;
struct timer_list timer;
unsigned int misc_tflags;
};
#define IDA_LOCK(i) (&hba[i]->lock)
#endif
#endif /* CPQARRAY_H */

View File

@ -1,349 +0,0 @@
/*
* Disk Array driver for Compaq SMART2 Controllers
* Copyright 1998 Compaq Computer Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Questions/Comments/Bugfixes to iss_storagedev@hp.com
*
*/
#ifndef ARRAYCMD_H
#define ARRAYCMD_H
#include <asm/types.h>
#if 0
#include <linux/blkdev.h>
#endif
/* for the Smart Array 42XX cards */
#define S42XX_REQUEST_PORT_OFFSET 0x40
#define S42XX_REPLY_INTR_MASK_OFFSET 0x34
#define S42XX_REPLY_PORT_OFFSET 0x44
#define S42XX_INTR_STATUS 0x30
#define S42XX_INTR_OFF 0x08
#define S42XX_INTR_PENDING 0x08
#define COMMAND_FIFO 0x04
#define COMMAND_COMPLETE_FIFO 0x08
#define INTR_MASK 0x0C
#define INTR_STATUS 0x10
#define INTR_PENDING 0x14
#define FIFO_NOT_EMPTY 0x01
#define FIFO_NOT_FULL 0x02
#define BIG_PROBLEM 0x40
#define LOG_NOT_CONF 2
#pragma pack(1)
typedef struct {
__u32 size;
__u32 addr;
} sg_t;
#define RCODE_NONFATAL 0x02
#define RCODE_FATAL 0x04
#define RCODE_INVREQ 0x10
typedef struct {
__u16 next;
__u8 cmd;
__u8 rcode;
__u32 blk;
__u16 blk_cnt;
__u8 sg_cnt;
__u8 reserved;
} rhdr_t;
#define SG_MAX 32
typedef struct {
rhdr_t hdr;
sg_t sg[SG_MAX];
__u32 bp;
} rblk_t;
typedef struct {
__u8 unit;
__u8 prio;
__u16 size;
} chdr_t;
#define CMD_RWREQ 0x00
#define CMD_IOCTL_PEND 0x01
#define CMD_IOCTL_DONE 0x02
typedef struct cmdlist {
chdr_t hdr;
rblk_t req;
__u32 size;
int retry_cnt;
__u32 busaddr;
int ctlr;
struct cmdlist *prev;
struct cmdlist *next;
struct request *rq;
int type;
} cmdlist_t;
#define ID_CTLR 0x11
typedef struct {
__u8 nr_drvs;
__u32 cfg_sig;
__u8 firm_rev[4];
__u8 rom_rev[4];
__u8 hw_rev;
__u32 bb_rev;
__u32 drv_present_map;
__u32 ext_drv_map;
__u32 board_id;
__u8 cfg_error;
__u32 non_disk_bits;
__u8 bad_ram_addr;
__u8 cpu_rev;
__u8 pdpi_rev;
__u8 epic_rev;
__u8 wcxc_rev;
__u8 marketing_rev;
__u8 ctlr_flags;
__u8 host_flags;
__u8 expand_dis;
__u8 scsi_chips;
__u32 max_req_blocks;
__u32 ctlr_clock;
__u8 drvs_per_bus;
__u16 big_drv_present_map[8];
__u16 big_ext_drv_map[8];
__u16 big_non_disk_map[8];
__u16 task_flags;
__u8 icl_bus;
__u8 red_modes;
__u8 cur_red_mode;
__u8 red_ctlr_stat;
__u8 red_fail_reason;
__u8 reserved[403];
} id_ctlr_t;
typedef struct {
__u16 cyl;
__u8 heads;
__u8 xsig;
__u8 psectors;
__u16 wpre;
__u8 maxecc;
__u8 drv_ctrl;
__u16 pcyls;
__u8 pheads;
__u16 landz;
__u8 sect_per_track;
__u8 cksum;
} drv_param_t;
#define ID_LOG_DRV 0x10
typedef struct {
__u16 blk_size;
__u32 nr_blks;
drv_param_t drv;
__u8 fault_tol;
__u8 reserved;
__u8 bios_disable;
} id_log_drv_t;
#define ID_LOG_DRV_EXT 0x18
typedef struct {
__u32 log_drv_id;
__u8 log_drv_label[64];
__u8 reserved[418];
} id_log_drv_ext_t;
#define SENSE_LOG_DRV_STAT 0x12
typedef struct {
__u8 status;
__u32 fail_map;
__u16 read_err[32];
__u16 write_err[32];
__u8 drv_err_data[256];
__u8 drq_timeout[32];
__u32 blks_to_recover;
__u8 drv_recovering;
__u16 remap_cnt[32];
__u32 replace_drv_map;
__u32 act_spare_map;
__u8 spare_stat;
__u8 spare_repl_map[32];
__u32 repl_ok_map;
__u8 media_exch;
__u8 cache_fail;
__u8 expn_fail;
__u8 unit_flags;
__u16 big_fail_map[8];
__u16 big_remap_map[128];
__u16 big_repl_map[8];
__u16 big_act_spare_map[8];
__u8 big_spar_repl_map[128];
__u16 big_repl_ok_map[8];
__u8 big_drv_rebuild;
__u8 reserved[36];
} sense_log_drv_stat_t;
#define START_RECOVER 0x13
#define ID_PHYS_DRV 0x15
typedef struct {
__u8 scsi_bus;
__u8 scsi_id;
__u16 blk_size;
__u32 nr_blks;
__u32 rsvd_blks;
__u8 drv_model[40];
__u8 drv_sn[40];
__u8 drv_fw[8];
__u8 scsi_iq_bits;
__u8 compaq_drv_stmp;
__u8 last_fail;
__u8 phys_drv_flags;
__u8 phys_drv_flags1;
__u8 scsi_lun;
__u8 phys_drv_flags2;
__u8 reserved;
__u32 spi_speed_rules;
__u8 phys_connector[2];
__u8 phys_box_on_bus;
__u8 phys_bay_in_box;
} id_phys_drv_t;
#define BLINK_DRV_LEDS 0x16
typedef struct {
__u32 blink_duration;
__u32 reserved;
__u8 blink[256];
__u8 reserved1[248];
} blink_drv_leds_t;
#define SENSE_BLINK_LEDS 0x17
typedef struct {
__u32 blink_duration;
__u32 btime_elap;
__u8 blink[256];
__u8 reserved1[248];
} sense_blink_leds_t;
#define IDA_READ 0x20
#define IDA_WRITE 0x30
#define IDA_WRITE_MEDIA 0x31
#define RESET_TO_DIAG 0x40
#define DIAG_PASS_THRU 0x41
#define SENSE_CONFIG 0x50
#define SET_CONFIG 0x51
typedef struct {
__u32 cfg_sig;
__u16 compat_port;
__u8 data_dist_mode;
__u8 surf_an_ctrl;
__u16 ctlr_phys_drv;
__u16 log_unit_phys_drv;
__u16 fault_tol_mode;
__u8 phys_drv_param[16];
drv_param_t drv;
__u32 drv_asgn_map;
__u16 dist_factor;
__u32 spare_asgn_map;
__u8 reserved[6];
__u16 os;
__u8 ctlr_order;
__u8 extra_info;
__u32 data_offs;
__u8 parity_backedout_write_drvs;
__u8 parity_dist_mode;
__u8 parity_shift_fact;
__u8 bios_disable_flag;
__u32 blks_on_vol;
__u32 blks_per_drv;
__u8 scratch[16];
__u16 big_drv_map[8];
__u16 big_spare_map[8];
__u8 ss_source_vol;
__u8 mix_drv_cap_range;
struct {
__u16 big_drv_map[8];
__u32 blks_per_drv;
__u16 fault_tol_mode;
__u16 dist_factor;
} MDC_range[4];
__u8 reserved1[248];
} config_t;
#define BYPASS_VOL_STATE 0x52
#define SS_CREATE_VOL 0x53
#define CHANGE_CONFIG 0x54
#define SENSE_ORIG_CONF 0x55
#define REORDER_LOG_DRV 0x56
typedef struct {
__u8 old_units[32];
} reorder_log_drv_t;
#define LABEL_LOG_DRV 0x57
typedef struct {
__u8 log_drv_label[64];
} label_log_drv_t;
#define SS_TO_VOL 0x58
#define SET_SURF_DELAY 0x60
typedef struct {
__u16 delay;
__u8 reserved[510];
} surf_delay_t;
#define SET_OVERHEAT_DELAY 0x61
typedef struct {
__u16 delay;
} overhead_delay_t;
#define SET_MP_DELAY
typedef struct {
__u16 delay;
__u8 reserved[510];
} mp_delay_t;
#define PASSTHRU_A 0x91
typedef struct {
__u8 target;
__u8 bus;
__u8 lun;
__u32 timeout;
__u32 flags;
__u8 status;
__u8 error;
__u8 cdb_len;
__u8 sense_error;
__u8 sense_key;
__u32 sense_info;
__u8 sense_code;
__u8 sense_qual;
__u32 residual;
__u8 reserved[4];
__u8 cdb[12];
} scsi_param_t;
#define RESUME_BACKGROUND_ACTIVITY 0x99
#define SENSE_CONTROLLER_PERFORMANCE 0xa8
#define FLUSH_CACHE 0xc2
#define COLLECT_BUFFER 0xd2
#define READ_FLASH_ROM 0xf6
#define WRITE_FLASH_ROM 0xf7
#pragma pack()
#endif /* ARRAYCMD_H */

View File

@ -1,87 +0,0 @@
/*
* Disk Array driver for Compaq SMART2 Controllers
* Copyright 1998 Compaq Computer Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Questions/Comments/Bugfixes to iss_storagedev@hp.com
*
*/
#ifndef IDA_IOCTL_H
#define IDA_IOCTL_H
#include "ida_cmd.h"
#include "cpqarray.h"
#define IDAGETDRVINFO 0x27272828
#define IDAPASSTHRU 0x28282929
#define IDAGETCTLRSIG 0x29293030
#define IDAREVALIDATEVOLS 0x30303131
#define IDADRIVERVERSION 0x31313232
#define IDAGETPCIINFO 0x32323333
typedef struct _ida_pci_info_struct
{
unsigned char bus;
unsigned char dev_fn;
__u32 board_id;
} ida_pci_info_struct;
/*
* Normally, the ioctl determines the logical unit for this command by
* the major,minor number of the fd passed to ioctl. If you need to send
* a command to a different/nonexistant unit (such as during config), you
* can override the normal behavior by setting the unit valid bit. (Normally,
* it should be zero) The controller the command is sent to is still
* determined by the major number of the open device.
*/
#define UNITVALID 0x80
typedef struct {
__u8 cmd;
__u8 rcode;
__u8 unit;
__u32 blk;
__u16 blk_cnt;
/* currently, sg_cnt is assumed to be 1: only the 0th element of sg is used */
struct {
void __user *addr;
size_t size;
} sg[SG_MAX];
int sg_cnt;
union ctlr_cmds {
drv_info_t drv;
unsigned char buf[1024];
id_ctlr_t id_ctlr;
drv_param_t drv_param;
id_log_drv_t id_log_drv;
id_log_drv_ext_t id_log_drv_ext;
sense_log_drv_stat_t sense_log_drv_stat;
id_phys_drv_t id_phys_drv;
blink_drv_leds_t blink_drv_leds;
sense_blink_leds_t sense_blink_leds;
config_t config;
reorder_log_drv_t reorder_log_drv;
label_log_drv_t label_log_drv;
surf_delay_t surf_delay;
overhead_delay_t overhead_delay;
mp_delay_t mp_delay;
scsi_param_t scsi_param;
} c;
} ida_ioctl_t;
#endif /* IDA_IOCTL_H */

View File

@ -173,7 +173,13 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
{
struct request *rq;
if (mtip_check_surprise_removal(dd->pdev))
return NULL;
rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED);
if (IS_ERR(rq))
return NULL;
return blk_mq_rq_to_pdu(rq);
}
@ -233,15 +239,9 @@ static void mtip_async_complete(struct mtip_port *port,
"Command tag %d failed due to TFE\n", tag);
}
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents, cmd->direction);
rq = mtip_rq_from_tag(dd, tag);
if (unlikely(cmd->unaligned))
up(&port->cmd_slot_unal);
blk_mq_end_request(rq, status ? -EIO : 0);
blk_mq_complete_request(rq, status);
}
/*
@ -581,6 +581,8 @@ static void mtip_completion(struct mtip_port *port,
dev_warn(&port->dd->pdev->dev,
"Internal command %d completed with TFE\n", tag);
command->comp_func = NULL;
command->comp_data = NULL;
complete(waiting);
}
@ -618,8 +620,6 @@ static void mtip_handle_tfe(struct driver_data *dd)
port = dd->port;
set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
@ -628,7 +628,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
cmd->comp_func(port, MTIP_TAG_INTERNAL,
cmd, PORT_IRQ_TF_ERR);
}
goto handle_tfe_exit;
return;
}
/* clear the tag accumulator */
@ -701,7 +701,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
fail_reason = "thermal shutdown";
}
if (buf[288] == 0xBF) {
set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag);
dev_info(&dd->pdev->dev,
"Drive indicates rebuild has failed. Secure erase required.\n");
fail_all_ncq_cmds = 1;
@ -771,11 +771,6 @@ static void mtip_handle_tfe(struct driver_data *dd)
}
}
print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
handle_tfe_exit:
/* clear eh_active */
clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
wake_up_interruptible(&port->svc_wait);
}
/*
@ -1007,6 +1002,7 @@ static bool mtip_pause_ncq(struct mtip_port *port,
(fis->features == 0x27 || fis->features == 0x72 ||
fis->features == 0x62 || fis->features == 0x26))) {
clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
clear_bit(MTIP_DDF_REBUILD_FAILED_BIT, &port->dd->dd_flag);
/* Com reset after secure erase or lowlevel format */
mtip_restart_port(port);
clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
@ -1021,12 +1017,14 @@ static bool mtip_pause_ncq(struct mtip_port *port,
*
* @port Pointer to port data structure
* @timeout Max duration to wait (ms)
* @atomic gfp_t flag to indicate blockable context or not
*
* return value
* 0 Success
* -EBUSY Commands still active
*/
static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout,
gfp_t atomic)
{
unsigned long to;
unsigned int n;
@ -1037,16 +1035,21 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
to = jiffies + msecs_to_jiffies(timeout);
do {
if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags) &&
atomic == GFP_KERNEL) {
msleep(20);
continue; /* svc thd is actively issuing commands */
}
msleep(100);
if (atomic == GFP_KERNEL)
msleep(100);
else {
cpu_relax();
udelay(100);
}
if (mtip_check_surprise_removal(port->dd->pdev))
goto err_fault;
if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
goto err_fault;
/*
* Ignore s_active bit 0 of array element 0.
@ -1099,6 +1102,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
struct mtip_cmd *int_cmd;
struct driver_data *dd = port->dd;
int rv = 0;
unsigned long start;
/* Make sure the buffer is 8 byte aligned. This is asic specific. */
if (buffer & 0x00000007) {
@ -1107,6 +1111,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
}
int_cmd = mtip_get_int_command(dd);
if (!int_cmd) {
dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n");
return -EFAULT;
}
set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
@ -1119,7 +1127,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
if (fis->command != ATA_CMD_STANDBYNOW1) {
/* wait for io to complete if non atomic */
if (mtip_quiesce_io(port,
MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) {
MTIP_QUIESCE_IO_TIMEOUT_MS, atomic) < 0) {
dev_warn(&dd->pdev->dev,
"Failed to quiesce IO\n");
mtip_put_int_command(dd, int_cmd);
@ -1162,6 +1170,8 @@ static int mtip_exec_internal_command(struct mtip_port *port,
/* Populate the command header */
int_cmd->command_header->byte_count = 0;
start = jiffies;
/* Issue the command to the hardware */
mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
@ -1170,10 +1180,12 @@ static int mtip_exec_internal_command(struct mtip_port *port,
if ((rv = wait_for_completion_interruptible_timeout(
&wait,
msecs_to_jiffies(timeout))) <= 0) {
if (rv == -ERESTARTSYS) { /* interrupted */
dev_err(&dd->pdev->dev,
"Internal command [%02X] was interrupted after %lu ms\n",
fis->command, timeout);
"Internal command [%02X] was interrupted after %u ms\n",
fis->command,
jiffies_to_msecs(jiffies - start));
rv = -EINTR;
goto exec_ic_exit;
} else if (rv == 0) /* timeout */
@ -2890,6 +2902,42 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
return -EFAULT;
}
static void mtip_softirq_done_fn(struct request *rq)
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
struct driver_data *dd = rq->q->queuedata;
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents,
cmd->direction);
if (unlikely(cmd->unaligned))
up(&dd->port->cmd_slot_unal);
blk_mq_end_request(rq, rq->errors);
}
static void mtip_abort_cmd(struct request *req, void *data,
bool reserved)
{
struct driver_data *dd = data;
dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
clear_bit(req->tag, dd->port->cmds_to_issue);
req->errors = -EIO;
mtip_softirq_done_fn(req);
}
static void mtip_queue_cmd(struct request *req, void *data,
bool reserved)
{
struct driver_data *dd = data;
set_bit(req->tag, dd->port->cmds_to_issue);
blk_abort_request(req);
}
/*
* service thread to issue queued commands
*
@ -2902,7 +2950,7 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
static int mtip_service_thread(void *data)
{
struct driver_data *dd = (struct driver_data *)data;
unsigned long slot, slot_start, slot_wrap;
unsigned long slot, slot_start, slot_wrap, to;
unsigned int num_cmd_slots = dd->slot_groups * 32;
struct mtip_port *port = dd->port;
@ -2917,9 +2965,7 @@ static int mtip_service_thread(void *data)
* is in progress nor error handling is active
*/
wait_event_interruptible(port->svc_wait, (port->flags) &&
!(port->flags & MTIP_PF_PAUSE_IO));
set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
(port->flags & MTIP_PF_SVC_THD_WORK));
if (kthread_should_stop() ||
test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
@ -2929,6 +2975,8 @@ static int mtip_service_thread(void *data)
&dd->dd_flag)))
goto st_out;
set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
restart_eh:
/* Demux bits: start with error handling */
if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) {
@ -2939,6 +2987,32 @@ static int mtip_service_thread(void *data)
if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags))
goto restart_eh;
if (test_bit(MTIP_PF_TO_ACTIVE_BIT, &port->flags)) {
to = jiffies + msecs_to_jiffies(5000);
do {
mdelay(100);
} while (atomic_read(&dd->irq_workers_active) != 0 &&
time_before(jiffies, to));
if (atomic_read(&dd->irq_workers_active) != 0)
dev_warn(&dd->pdev->dev,
"Completion workers still active!");
spin_lock(dd->queue->queue_lock);
blk_mq_all_tag_busy_iter(*dd->tags.tags,
mtip_queue_cmd, dd);
spin_unlock(dd->queue->queue_lock);
set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags);
if (mtip_device_reset(dd))
blk_mq_all_tag_busy_iter(*dd->tags.tags,
mtip_abort_cmd, dd);
clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags);
}
if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
slot = 1;
/* used to restrict the loop to one iteration */
@ -2971,10 +3045,8 @@ static int mtip_service_thread(void *data)
}
if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
if (mtip_ftl_rebuild_poll(dd) < 0)
set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
&dd->dd_flag);
clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
if (mtip_ftl_rebuild_poll(dd) == 0)
clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
}
}
@ -3089,7 +3161,7 @@ static int mtip_hw_get_identify(struct driver_data *dd)
if (buf[288] == 0xBF) {
dev_info(&dd->pdev->dev,
"Drive indicates rebuild has failed.\n");
/* TODO */
set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag);
}
}
@ -3263,20 +3335,25 @@ static int mtip_hw_init(struct driver_data *dd)
return rv;
}
static void mtip_standby_drive(struct driver_data *dd)
static int mtip_standby_drive(struct driver_data *dd)
{
if (dd->sr)
return;
int rv = 0;
if (dd->sr || !dd->port)
return -ENODEV;
/*
* Send standby immediate (E0h) to the drive so that it
* saves its state.
*/
if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
!test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
if (mtip_standby_immediate(dd->port))
!test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag) &&
!test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) {
rv = mtip_standby_immediate(dd->port);
if (rv)
dev_warn(&dd->pdev->dev,
"STANDBY IMMEDIATE failed\n");
}
return rv;
}
/*
@ -3289,10 +3366,6 @@ static void mtip_standby_drive(struct driver_data *dd)
*/
static int mtip_hw_exit(struct driver_data *dd)
{
/*
* Send standby immediate (E0h) to the drive so that it
* saves its state.
*/
if (!dd->sr) {
/* de-initialize the port. */
mtip_deinit_port(dd->port);
@ -3334,8 +3407,7 @@ static int mtip_hw_shutdown(struct driver_data *dd)
* Send standby immediate (E0h) to the drive so that it
* saves its state.
*/
if (!dd->sr && dd->port)
mtip_standby_immediate(dd->port);
mtip_standby_drive(dd);
return 0;
}
@ -3358,7 +3430,7 @@ static int mtip_hw_suspend(struct driver_data *dd)
* Send standby immediate (E0h) to the drive
* so that it saves its state.
*/
if (mtip_standby_immediate(dd->port) != 0) {
if (mtip_standby_drive(dd) != 0) {
dev_err(&dd->pdev->dev,
"Failed standby-immediate command\n");
return -EFAULT;
@ -3596,6 +3668,28 @@ static int mtip_block_getgeo(struct block_device *dev,
return 0;
}
static int mtip_block_open(struct block_device *dev, fmode_t mode)
{
struct driver_data *dd;
if (dev && dev->bd_disk) {
dd = (struct driver_data *) dev->bd_disk->private_data;
if (dd) {
if (test_bit(MTIP_DDF_REMOVAL_BIT,
&dd->dd_flag)) {
return -ENODEV;
}
return 0;
}
}
return -ENODEV;
}
void mtip_block_release(struct gendisk *disk, fmode_t mode)
{
}
/*
* Block device operation function.
*
@ -3603,6 +3697,8 @@ static int mtip_block_getgeo(struct block_device *dev,
* layer.
*/
static const struct block_device_operations mtip_block_ops = {
.open = mtip_block_open,
.release = mtip_block_release,
.ioctl = mtip_block_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = mtip_block_compat_ioctl,
@ -3664,10 +3760,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
rq_data_dir(rq))) {
return -ENODATA;
}
if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)))
if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag) ||
test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)))
return -ENODATA;
if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
return -ENXIO;
}
if (rq->cmd_flags & REQ_DISCARD) {
@ -3779,11 +3874,32 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx,
return 0;
}
static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
bool reserved)
{
struct driver_data *dd = req->q->queuedata;
if (reserved)
goto exit_handler;
if (test_bit(req->tag, dd->port->cmds_to_issue))
goto exit_handler;
if (test_and_set_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags))
goto exit_handler;
wake_up_interruptible(&dd->port->svc_wait);
exit_handler:
return BLK_EH_RESET_TIMER;
}
static struct blk_mq_ops mtip_mq_ops = {
.queue_rq = mtip_queue_rq,
.map_queue = blk_mq_map_queue,
.init_request = mtip_init_cmd,
.exit_request = mtip_free_cmd,
.complete = mtip_softirq_done_fn,
.timeout = mtip_cmd_timeout,
};
/*
@ -3850,7 +3966,6 @@ static int mtip_block_initialize(struct driver_data *dd)
mtip_hw_debugfs_init(dd);
skip_create_disk:
memset(&dd->tags, 0, sizeof(dd->tags));
dd->tags.ops = &mtip_mq_ops;
dd->tags.nr_hw_queues = 1;
@ -3860,12 +3975,13 @@ static int mtip_block_initialize(struct driver_data *dd)
dd->tags.numa_node = dd->numa_node;
dd->tags.flags = BLK_MQ_F_SHOULD_MERGE;
dd->tags.driver_data = dd;
dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS;
rv = blk_mq_alloc_tag_set(&dd->tags);
if (rv) {
dev_err(&dd->pdev->dev,
"Unable to allocate request queue\n");
goto block_queue_alloc_init_error;
goto block_queue_alloc_tag_error;
}
/* Allocate the request queue. */
@ -3880,6 +3996,7 @@ static int mtip_block_initialize(struct driver_data *dd)
dd->disk->queue = dd->queue;
dd->queue->queuedata = dd;
skip_create_disk:
/* Initialize the protocol layer. */
wait_for_rebuild = mtip_hw_get_identify(dd);
if (wait_for_rebuild < 0) {
@ -3976,8 +4093,9 @@ static int mtip_block_initialize(struct driver_data *dd)
read_capacity_error:
init_hw_cmds_error:
blk_cleanup_queue(dd->queue);
blk_mq_free_tag_set(&dd->tags);
block_queue_alloc_init_error:
blk_mq_free_tag_set(&dd->tags);
block_queue_alloc_tag_error:
mtip_hw_debugfs_exit(dd);
disk_index_error:
spin_lock(&rssd_index_lock);
@ -3994,6 +4112,22 @@ static int mtip_block_initialize(struct driver_data *dd)
return rv;
}
static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
{
struct driver_data *dd = (struct driver_data *)data;
struct mtip_cmd *cmd;
if (likely(!reserv))
blk_mq_complete_request(rq, -ENODEV);
else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) {
cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
if (cmd->comp_func)
cmd->comp_func(dd->port, MTIP_TAG_INTERNAL,
cmd, -ENODEV);
}
}
/*
* Block layer deinitialization function.
*
@ -4025,12 +4159,23 @@ static int mtip_block_remove(struct driver_data *dd)
}
}
if (!dd->sr)
mtip_standby_drive(dd);
if (!dd->sr) {
/*
* Explicitly wait here for IOs to quiesce,
* as mtip_standby_drive usually won't wait for IOs.
*/
if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS,
GFP_KERNEL))
mtip_standby_drive(dd);
}
else
dev_info(&dd->pdev->dev, "device %s surprise removal\n",
dd->disk->disk_name);
blk_mq_freeze_queue_start(dd->queue);
blk_mq_stop_hw_queues(dd->queue);
blk_mq_all_tag_busy_iter(dd->tags.tags[0], mtip_no_dev_cleanup, dd);
/*
* Delete our gendisk structure. This also removes the device
* from /dev
@ -4040,7 +4185,8 @@ static int mtip_block_remove(struct driver_data *dd)
dd->bdev = NULL;
}
if (dd->disk) {
del_gendisk(dd->disk);
if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
del_gendisk(dd->disk);
if (dd->disk->queue) {
blk_cleanup_queue(dd->queue);
blk_mq_free_tag_set(&dd->tags);
@ -4081,7 +4227,8 @@ static int mtip_block_shutdown(struct driver_data *dd)
dev_info(&dd->pdev->dev,
"Shutting down %s ...\n", dd->disk->disk_name);
del_gendisk(dd->disk);
if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
del_gendisk(dd->disk);
if (dd->disk->queue) {
blk_cleanup_queue(dd->queue);
blk_mq_free_tag_set(&dd->tags);
@ -4426,7 +4573,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
struct driver_data *dd = pci_get_drvdata(pdev);
unsigned long flags, to;
set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag);
spin_lock_irqsave(&dev_lock, flags);
list_del_init(&dd->online_list);
@ -4443,12 +4590,17 @@ static void mtip_pci_remove(struct pci_dev *pdev)
} while (atomic_read(&dd->irq_workers_active) != 0 &&
time_before(jiffies, to));
if (!dd->sr)
fsync_bdev(dd->bdev);
if (atomic_read(&dd->irq_workers_active) != 0) {
dev_warn(&dd->pdev->dev,
"Completion workers still active!\n");
}
blk_mq_stop_hw_queues(dd->queue);
blk_set_queue_dying(dd->queue);
set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
/* Clean up the block layer. */
mtip_block_remove(dd);

View File

@ -134,16 +134,24 @@ enum {
MTIP_PF_EH_ACTIVE_BIT = 1, /* error handling */
MTIP_PF_SE_ACTIVE_BIT = 2, /* secure erase */
MTIP_PF_DM_ACTIVE_BIT = 3, /* download microcde */
MTIP_PF_TO_ACTIVE_BIT = 9, /* timeout handling */
MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) |
(1 << MTIP_PF_EH_ACTIVE_BIT) |
(1 << MTIP_PF_SE_ACTIVE_BIT) |
(1 << MTIP_PF_DM_ACTIVE_BIT)),
(1 << MTIP_PF_DM_ACTIVE_BIT) |
(1 << MTIP_PF_TO_ACTIVE_BIT)),
MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
MTIP_PF_ISSUE_CMDS_BIT = 5,
MTIP_PF_REBUILD_BIT = 6,
MTIP_PF_SVC_THD_STOP_BIT = 8,
MTIP_PF_SVC_THD_WORK = ((1 << MTIP_PF_EH_ACTIVE_BIT) |
(1 << MTIP_PF_ISSUE_CMDS_BIT) |
(1 << MTIP_PF_REBUILD_BIT) |
(1 << MTIP_PF_SVC_THD_STOP_BIT) |
(1 << MTIP_PF_TO_ACTIVE_BIT)),
/* below are bit numbers in 'dd_flag' defined in driver_data */
MTIP_DDF_SEC_LOCK_BIT = 0,
MTIP_DDF_REMOVE_PENDING_BIT = 1,
@ -153,6 +161,7 @@ enum {
MTIP_DDF_RESUME_BIT = 6,
MTIP_DDF_INIT_DONE_BIT = 7,
MTIP_DDF_REBUILD_FAILED_BIT = 8,
MTIP_DDF_REMOVAL_BIT = 9,
MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
(1 << MTIP_DDF_SEC_LOCK_BIT) |

View File

@ -57,10 +57,12 @@ struct nbd_device {
int blksize;
loff_t bytesize;
int xmit_timeout;
bool timedout;
bool disconnect; /* a disconnect has been requested by user */
struct timer_list timeout_timer;
spinlock_t tasks_lock;
/* protects initialization and shutdown of the socket */
spinlock_t sock_lock;
struct task_struct *task_recv;
struct task_struct *task_send;
@ -98,6 +100,11 @@ static inline struct device *nbd_to_dev(struct nbd_device *nbd)
return disk_to_dev(nbd->disk);
}
static bool nbd_is_connected(struct nbd_device *nbd)
{
return !!nbd->task_recv;
}
static const char *nbdcmd_to_ascii(int cmd)
{
switch (cmd) {
@ -110,6 +117,42 @@ static const char *nbdcmd_to_ascii(int cmd)
return "invalid";
}
static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
{
bdev->bd_inode->i_size = 0;
set_capacity(nbd->disk, 0);
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
return 0;
}
static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
{
if (!nbd_is_connected(nbd))
return;
bdev->bd_inode->i_size = nbd->bytesize;
set_capacity(nbd->disk, nbd->bytesize >> 9);
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
}
static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
int blocksize, int nr_blocks)
{
int ret;
ret = set_blocksize(bdev, blocksize);
if (ret)
return ret;
nbd->blksize = blocksize;
nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks;
nbd_size_update(nbd, bdev);
return 0;
}
static void nbd_end_request(struct nbd_device *nbd, struct request *req)
{
int error = req->errors ? -EIO : 0;
@ -129,13 +172,20 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
*/
static void sock_shutdown(struct nbd_device *nbd)
{
if (!nbd->sock)
spin_lock_irq(&nbd->sock_lock);
if (!nbd->sock) {
spin_unlock_irq(&nbd->sock_lock);
return;
}
dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
sockfd_put(nbd->sock);
nbd->sock = NULL;
del_timer_sync(&nbd->timeout_timer);
spin_unlock_irq(&nbd->sock_lock);
del_timer(&nbd->timeout_timer);
}
static void nbd_xmit_timeout(unsigned long arg)
@ -146,19 +196,16 @@ static void nbd_xmit_timeout(unsigned long arg)
if (list_empty(&nbd->queue_head))
return;
nbd->disconnect = true;
spin_lock_irqsave(&nbd->sock_lock, flags);
spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->timedout = true;
if (nbd->task_recv)
force_sig(SIGKILL, nbd->task_recv);
if (nbd->sock)
kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
if (nbd->task_send)
force_sig(SIGKILL, nbd->task_send);
spin_unlock_irqrestore(&nbd->sock_lock, flags);
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
}
/*
@ -171,7 +218,6 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
int result;
struct msghdr msg;
struct kvec iov;
sigset_t blocked, oldset;
unsigned long pflags = current->flags;
if (unlikely(!sock)) {
@ -181,11 +227,6 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
return -EINVAL;
}
/* Allow interception of SIGKILL only
* Don't allow other signals to interrupt the transmission */
siginitsetinv(&blocked, sigmask(SIGKILL));
sigprocmask(SIG_SETMASK, &blocked, &oldset);
current->flags |= PF_MEMALLOC;
do {
sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
@ -212,7 +253,6 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
buf += result;
} while (size > 0);
sigprocmask(SIG_SETMASK, &oldset, NULL);
tsk_restore_flags(current, pflags, PF_MEMALLOC);
if (!send && nbd->xmit_timeout)
@ -402,31 +442,28 @@ static struct device_attribute pid_attr = {
.show = pid_show,
};
static int nbd_thread_recv(struct nbd_device *nbd)
static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
{
struct request *req;
int ret;
unsigned long flags;
BUG_ON(nbd->magic != NBD_MAGIC);
sk_set_memalloc(nbd->sock->sk);
spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_recv = current;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) {
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_recv = NULL;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
return ret;
}
nbd_size_update(nbd, bdev);
while (1) {
req = nbd_read_stat(nbd);
if (IS_ERR(req)) {
@ -437,21 +474,11 @@ static int nbd_thread_recv(struct nbd_device *nbd)
nbd_end_request(nbd, req);
}
nbd_size_clear(nbd, bdev);
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_recv = NULL;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
if (signal_pending(current)) {
ret = kernel_dequeue_signal(NULL);
dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
task_pid_nr(current), current->comm, ret);
mutex_lock(&nbd->tx_lock);
sock_shutdown(nbd);
mutex_unlock(&nbd->tx_lock);
ret = -ETIMEDOUT;
}
return ret;
}
@ -544,11 +571,8 @@ static int nbd_thread_send(void *data)
{
struct nbd_device *nbd = data;
struct request *req;
unsigned long flags;
spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_send = current;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
set_user_nice(current, MIN_NICE);
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
@ -557,17 +581,6 @@ static int nbd_thread_send(void *data)
kthread_should_stop() ||
!list_empty(&nbd->waiting_queue));
if (signal_pending(current)) {
int ret = kernel_dequeue_signal(NULL);
dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
task_pid_nr(current), current->comm, ret);
mutex_lock(&nbd->tx_lock);
sock_shutdown(nbd);
mutex_unlock(&nbd->tx_lock);
break;
}
/* extract request */
if (list_empty(&nbd->waiting_queue))
continue;
@ -582,13 +595,7 @@ static int nbd_thread_send(void *data)
nbd_handle_req(nbd, req);
}
spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_send = NULL;
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
/* Clear maybe pending signals */
if (signal_pending(current))
kernel_dequeue_signal(NULL);
return 0;
}
@ -618,8 +625,8 @@ static void nbd_request_handler(struct request_queue *q)
req, req->cmd_type);
if (unlikely(!nbd->sock)) {
dev_err(disk_to_dev(nbd->disk),
"Attempted send on closed socket\n");
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on closed socket\n");
req->errors++;
nbd_end_request(nbd, req);
spin_lock_irq(q->queue_lock);
@ -636,6 +643,61 @@ static void nbd_request_handler(struct request_queue *q)
}
}
static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
{
int ret = 0;
spin_lock_irq(&nbd->sock_lock);
if (nbd->sock) {
ret = -EBUSY;
goto out;
}
nbd->sock = sock;
out:
spin_unlock_irq(&nbd->sock_lock);
return ret;
}
/* Reset all properties of an NBD device */
static void nbd_reset(struct nbd_device *nbd)
{
nbd->disconnect = false;
nbd->timedout = false;
nbd->blksize = 1024;
nbd->bytesize = 0;
set_capacity(nbd->disk, 0);
nbd->flags = 0;
nbd->xmit_timeout = 0;
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
del_timer_sync(&nbd->timeout_timer);
}
static void nbd_bdev_reset(struct block_device *bdev)
{
set_device_ro(bdev, false);
bdev->bd_inode->i_size = 0;
if (max_part > 0) {
blkdev_reread_part(bdev);
bdev->bd_invalidated = 1;
}
}
static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
{
if (nbd->flags & NBD_FLAG_READ_ONLY)
set_device_ro(bdev, true);
if (nbd->flags & NBD_FLAG_SEND_TRIM)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
if (nbd->flags & NBD_FLAG_SEND_FLUSH)
blk_queue_flush(nbd->disk->queue, REQ_FLUSH);
else
blk_queue_flush(nbd->disk->queue, 0);
}
static int nbd_dev_dbg_init(struct nbd_device *nbd);
static void nbd_dev_dbg_close(struct nbd_device *nbd);
@ -668,48 +730,40 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
return 0;
}
case NBD_CLEAR_SOCK: {
struct socket *sock = nbd->sock;
nbd->sock = NULL;
case NBD_CLEAR_SOCK:
sock_shutdown(nbd);
nbd_clear_que(nbd);
BUG_ON(!list_empty(&nbd->queue_head));
BUG_ON(!list_empty(&nbd->waiting_queue));
kill_bdev(bdev);
if (sock)
sockfd_put(sock);
return 0;
}
case NBD_SET_SOCK: {
struct socket *sock;
int err;
if (nbd->sock)
return -EBUSY;
sock = sockfd_lookup(arg, &err);
if (sock) {
nbd->sock = sock;
if (max_part > 0)
bdev->bd_invalidated = 1;
nbd->disconnect = false; /* we're connected now */
return 0;
}
return -EINVAL;
struct socket *sock = sockfd_lookup(arg, &err);
if (!sock)
return err;
err = nbd_set_socket(nbd, sock);
if (!err && max_part)
bdev->bd_invalidated = 1;
return err;
}
case NBD_SET_BLKSIZE:
nbd->blksize = arg;
nbd->bytesize &= ~(nbd->blksize-1);
bdev->bd_inode->i_size = nbd->bytesize;
set_blocksize(bdev, nbd->blksize);
set_capacity(nbd->disk, nbd->bytesize >> 9);
return 0;
case NBD_SET_BLKSIZE: {
loff_t bsize = div_s64(nbd->bytesize, arg);
return nbd_size_set(nbd, bdev, arg, bsize);
}
case NBD_SET_SIZE:
nbd->bytesize = arg & ~(nbd->blksize-1);
bdev->bd_inode->i_size = nbd->bytesize;
set_blocksize(bdev, nbd->blksize);
set_capacity(nbd->disk, nbd->bytesize >> 9);
return 0;
return nbd_size_set(nbd, bdev, nbd->blksize,
arg / nbd->blksize);
case NBD_SET_SIZE_BLOCKS:
return nbd_size_set(nbd, bdev, nbd->blksize, arg);
case NBD_SET_TIMEOUT:
nbd->xmit_timeout = arg * HZ;
@ -725,16 +779,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
nbd->flags = arg;
return 0;
case NBD_SET_SIZE_BLOCKS:
nbd->bytesize = ((u64) arg) * nbd->blksize;
bdev->bd_inode->i_size = nbd->bytesize;
set_blocksize(bdev, nbd->blksize);
set_capacity(nbd->disk, nbd->bytesize >> 9);
return 0;
case NBD_DO_IT: {
struct task_struct *thread;
struct socket *sock;
int error;
if (nbd->task_recv)
@ -744,15 +790,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
mutex_unlock(&nbd->tx_lock);
if (nbd->flags & NBD_FLAG_READ_ONLY)
set_device_ro(bdev, true);
if (nbd->flags & NBD_FLAG_SEND_TRIM)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
nbd->disk->queue);
if (nbd->flags & NBD_FLAG_SEND_FLUSH)
blk_queue_flush(nbd->disk->queue, REQ_FLUSH);
else
blk_queue_flush(nbd->disk->queue, 0);
nbd_parse_flags(nbd, bdev);
thread = kthread_run(nbd_thread_send, nbd, "%s",
nbd_name(nbd));
@ -762,29 +800,24 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
}
nbd_dev_dbg_init(nbd);
error = nbd_thread_recv(nbd);
error = nbd_thread_recv(nbd, bdev);
nbd_dev_dbg_close(nbd);
kthread_stop(thread);
mutex_lock(&nbd->tx_lock);
sock_shutdown(nbd);
sock = nbd->sock;
nbd->sock = NULL;
nbd_clear_que(nbd);
kill_bdev(bdev);
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
set_device_ro(bdev, false);
if (sock)
sockfd_put(sock);
nbd->flags = 0;
nbd->bytesize = 0;
bdev->bd_inode->i_size = 0;
set_capacity(nbd->disk, 0);
if (max_part > 0)
blkdev_reread_part(bdev);
nbd_bdev_reset(bdev);
if (nbd->disconnect) /* user requested, ignore socket errors */
return 0;
error = 0;
if (nbd->timedout)
error = -ETIMEDOUT;
nbd_reset(nbd);
return error;
}
@ -892,50 +925,23 @@ static const struct file_operations nbd_dbg_flags_ops = {
static int nbd_dev_dbg_init(struct nbd_device *nbd)
{
struct dentry *dir;
struct dentry *f;
if (!nbd_dbg_dir)
return -EIO;
dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
if (IS_ERR_OR_NULL(dir)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s' (%ld)\n",
nbd_name(nbd), PTR_ERR(dir));
return PTR_ERR(dir);
if (!dir) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
nbd_name(nbd));
return -EIO;
}
nbd->dbg_dir = dir;
f = debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'tasks', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
f = debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'size_bytes', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
f = debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'timeout', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
f = debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'blocksize', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
f = debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops);
if (IS_ERR_OR_NULL(f)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'flags', %ld\n",
PTR_ERR(f));
return PTR_ERR(f);
}
debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops);
return 0;
}
@ -950,8 +956,8 @@ static int nbd_dbg_init(void)
struct dentry *dbg_dir;
dbg_dir = debugfs_create_dir("nbd", NULL);
if (IS_ERR(dbg_dir))
return PTR_ERR(dbg_dir);
if (!dbg_dir)
return -EIO;
nbd_dbg_dir = dbg_dir;
@ -1069,7 +1075,7 @@ static int __init nbd_init(void)
nbd_dev[i].magic = NBD_MAGIC;
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
spin_lock_init(&nbd_dev[i].queue_lock);
spin_lock_init(&nbd_dev[i].tasks_lock);
spin_lock_init(&nbd_dev[i].sock_lock);
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
mutex_init(&nbd_dev[i].tx_lock);
init_timer(&nbd_dev[i].timeout_timer);
@ -1077,14 +1083,12 @@ static int __init nbd_init(void)
nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
init_waitqueue_head(&nbd_dev[i].active_wq);
init_waitqueue_head(&nbd_dev[i].waiting_wq);
nbd_dev[i].blksize = 1024;
nbd_dev[i].bytesize = 0;
disk->major = NBD_MAJOR;
disk->first_minor = i << part_shift;
disk->fops = &nbd_fops;
disk->private_data = &nbd_dev[i];
sprintf(disk->disk_name, "nbd%d", i);
set_capacity(disk, 0);
nbd_reset(&nbd_dev[i]);
add_disk(disk);
}

View File

@ -23,8 +23,7 @@
#include <xen/grant_table.h>
#include "common.h"
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
/* On the XenBus the max length of 'ring-ref%u'. */
#define RINGREF_NAME_LEN (20)
struct backend_info {
@ -76,7 +75,7 @@ static int blkback_name(struct xen_blkif *blkif, char *buf)
else
devname = devpath;
snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname);
snprintf(buf, TASK_COMM_LEN, "%d.%s", blkif->domid, devname);
kfree(devpath);
return 0;
@ -85,7 +84,7 @@ static int blkback_name(struct xen_blkif *blkif, char *buf)
static void xen_update_blkif_status(struct xen_blkif *blkif)
{
int err;
char name[BLKBACK_NAME_LEN];
char name[TASK_COMM_LEN];
struct xen_blkif_ring *ring;
int i;
@ -618,6 +617,14 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
goto fail;
}
err = xenbus_printf(XBT_NIL, dev->nodename,
"feature-max-indirect-segments", "%u",
MAX_INDIRECT_SEGMENTS);
if (err)
dev_warn(&dev->dev,
"writing %s/feature-max-indirect-segments (%d)",
dev->nodename, err);
/* Multi-queue: advertise how many queues are supported by us.*/
err = xenbus_printf(XBT_NIL, dev->nodename,
"multi-queue-max-queues", "%u", xenblk_max_queues);
@ -849,11 +856,6 @@ static void connect(struct backend_info *be)
dev->nodename);
goto abort;
}
err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
MAX_INDIRECT_SEGMENTS);
if (err)
dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
dev->nodename, err);
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
(unsigned long long)vbd_sz(&be->blkif->vbd));

View File

@ -125,8 +125,10 @@ static const struct block_device_operations xlvbd_block_fops;
*/
static unsigned int xen_blkif_max_segments = 32;
module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
module_param_named(max_indirect_segments, xen_blkif_max_segments, uint,
S_IRUGO);
MODULE_PARM_DESC(max_indirect_segments,
"Maximum amount of segments in indirect requests (default is 32)");
static unsigned int xen_blkif_max_queues = 4;
module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO);

View File

@ -250,7 +250,7 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
return 0;
}
plane_cnt = (1 << dev->plane_mode);
plane_cnt = dev->plane_mode;
rqd->nr_pages = plane_cnt * nr_ppas;
if (dev->ops->max_phys_sect < rqd->nr_pages)
@ -463,11 +463,7 @@ static int nvm_core_init(struct nvm_dev *dev)
dev->sec_per_lun = dev->sec_per_blk * dev->blks_per_lun;
dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
dev->total_blocks = dev->nr_planes *
dev->blks_per_lun *
dev->luns_per_chnl *
dev->nr_chnls;
dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
dev->total_secs = dev->nr_luns * dev->sec_per_lun;
INIT_LIST_HEAD(&dev->online_targets);
mutex_init(&dev->mlock);
@ -872,20 +868,19 @@ static int nvm_configure_by_str_event(const char *val,
static int nvm_configure_get(char *buf, const struct kernel_param *kp)
{
int sz = 0;
char *buf_start = buf;
int sz;
struct nvm_dev *dev;
buf += sprintf(buf, "available devices:\n");
sz = sprintf(buf, "available devices:\n");
down_write(&nvm_lock);
list_for_each_entry(dev, &nvm_devices, devices) {
if (sz > 4095 - DISK_NAME_LEN)
if (sz > 4095 - DISK_NAME_LEN - 2)
break;
buf += sprintf(buf, " %32s\n", dev->name);
sz += sprintf(buf + sz, " %32s\n", dev->name);
}
up_write(&nvm_lock);
return buf - buf_start - 1;
return sz;
}
static const struct kernel_param_ops nvm_configure_by_str_event_param_ops = {

View File

@ -100,14 +100,13 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
{
struct nvm_dev *dev = private;
struct gen_nvm *gn = dev->mp;
sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
u64 elba = slba + nlb;
struct gen_lun *lun;
struct nvm_block *blk;
u64 i;
int lun_id;
if (unlikely(elba > dev->total_pages)) {
if (unlikely(elba > dev->total_secs)) {
pr_err("gennvm: L2P data from device is out of bounds!\n");
return -EINVAL;
}
@ -115,7 +114,7 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
for (i = 0; i < nlb; i++) {
u64 pba = le64_to_cpu(entries[i]);
if (unlikely(pba >= max_pages && pba != U64_MAX)) {
if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) {
pr_err("gennvm: L2P data entry is out of bounds!\n");
return -EINVAL;
}
@ -197,7 +196,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
}
if (dev->ops->get_l2p_tbl) {
ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages,
ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs,
gennvm_block_map, dev);
if (ret) {
pr_err("gennvm: could not read L2P table.\n");

View File

@ -38,7 +38,7 @@ static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
spin_lock(&rblk->lock);
div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset);
div_u64_rem(a->addr, rrpc->dev->sec_per_blk, &pg_offset);
WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
rblk->nr_invalid_pages++;
@ -113,14 +113,24 @@ static void rrpc_discard(struct rrpc *rrpc, struct bio *bio)
static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
{
return (rblk->next_page == rrpc->dev->pgs_per_blk);
return (rblk->next_page == rrpc->dev->sec_per_blk);
}
/* Calculate relative addr for the given block, considering instantiated LUNs */
static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
{
struct nvm_block *blk = rblk->parent;
int lun_blk = blk->id % (rrpc->dev->blks_per_lun * rrpc->nr_luns);
return lun_blk * rrpc->dev->sec_per_blk;
}
/* Calculate global addr for the given block */
static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
{
struct nvm_block *blk = rblk->parent;
return blk->id * rrpc->dev->pgs_per_blk;
return blk->id * rrpc->dev->sec_per_blk;
}
static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
@ -136,7 +146,7 @@ static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
l.g.sec = secs;
sector_div(ppa, dev->sec_per_pg);
div_u64_rem(ppa, dev->sec_per_blk, &pgs);
div_u64_rem(ppa, dev->pgs_per_blk, &pgs);
l.g.pg = pgs;
sector_div(ppa, dev->pgs_per_blk);
@ -191,12 +201,12 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
return NULL;
}
rblk = &rlun->blocks[blk->id];
rblk = rrpc_get_rblk(rlun, blk->id);
list_add_tail(&rblk->list, &rlun->open_list);
spin_unlock(&lun->lock);
blk->priv = rblk;
bitmap_zero(rblk->invalid_pages, rrpc->dev->pgs_per_blk);
bitmap_zero(rblk->invalid_pages, rrpc->dev->sec_per_blk);
rblk->next_page = 0;
rblk->nr_invalid_pages = 0;
atomic_set(&rblk->data_cmnt_size, 0);
@ -286,11 +296,11 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
struct bio *bio;
struct page *page;
int slot;
int nr_pgs_per_blk = rrpc->dev->pgs_per_blk;
int nr_sec_per_blk = rrpc->dev->sec_per_blk;
u64 phys_addr;
DECLARE_COMPLETION_ONSTACK(wait);
if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk))
if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk))
return 0;
bio = bio_alloc(GFP_NOIO, 1);
@ -306,10 +316,10 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
}
while ((slot = find_first_zero_bit(rblk->invalid_pages,
nr_pgs_per_blk)) < nr_pgs_per_blk) {
nr_sec_per_blk)) < nr_sec_per_blk) {
/* Lock laddr */
phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot;
phys_addr = rblk->parent->id * nr_sec_per_blk + slot;
try:
spin_lock(&rrpc->rev_lock);
@ -381,7 +391,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
mempool_free(page, rrpc->page_pool);
bio_put(bio);
if (!bitmap_full(rblk->invalid_pages, nr_pgs_per_blk)) {
if (!bitmap_full(rblk->invalid_pages, nr_sec_per_blk)) {
pr_err("nvm: failed to garbage collect block\n");
return -EIO;
}
@ -499,12 +509,21 @@ static void rrpc_gc_queue(struct work_struct *work)
struct rrpc *rrpc = gcb->rrpc;
struct rrpc_block *rblk = gcb->rblk;
struct nvm_lun *lun = rblk->parent->lun;
struct nvm_block *blk = rblk->parent;
struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
spin_lock(&rlun->lock);
list_add_tail(&rblk->prio, &rlun->prio_list);
spin_unlock(&rlun->lock);
spin_lock(&lun->lock);
lun->nr_open_blocks--;
lun->nr_closed_blocks++;
blk->state &= ~NVM_BLK_ST_OPEN;
blk->state |= NVM_BLK_ST_CLOSED;
list_move_tail(&rblk->list, &rlun->closed_list);
spin_unlock(&lun->lock);
mempool_free(gcb, rrpc->gcb_pool);
pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
rblk->parent->id);
@ -545,7 +564,7 @@ static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
struct rrpc_addr *gp;
struct rrpc_rev_addr *rev;
BUG_ON(laddr >= rrpc->nr_pages);
BUG_ON(laddr >= rrpc->nr_sects);
gp = &rrpc->trans_map[laddr];
spin_lock(&rrpc->rev_lock);
@ -668,20 +687,8 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
lun = rblk->parent->lun;
cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) {
struct nvm_block *blk = rblk->parent;
struct rrpc_lun *rlun = rblk->rlun;
spin_lock(&lun->lock);
lun->nr_open_blocks--;
lun->nr_closed_blocks++;
blk->state &= ~NVM_BLK_ST_OPEN;
blk->state |= NVM_BLK_ST_CLOSED;
list_move_tail(&rblk->list, &rlun->closed_list);
spin_unlock(&lun->lock);
if (unlikely(cmnt_size == rrpc->dev->sec_per_blk))
rrpc_run_gc(rrpc, rblk);
}
}
}
@ -726,7 +733,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
for (i = 0; i < npages; i++) {
/* We assume that mapping occurs at 4KB granularity */
BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_pages));
BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
gp = &rrpc->trans_map[laddr + i];
if (gp->rblk) {
@ -757,7 +764,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
return NVM_IO_REQUEUE;
BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_pages));
BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
gp = &rrpc->trans_map[laddr];
if (gp->rblk) {
@ -1007,21 +1014,21 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
struct nvm_dev *dev = rrpc->dev;
struct rrpc_addr *addr = rrpc->trans_map + slba;
struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
u64 elba = slba + nlb;
u64 i;
if (unlikely(elba > dev->total_pages)) {
if (unlikely(elba > dev->total_secs)) {
pr_err("nvm: L2P data from device is out of bounds!\n");
return -EINVAL;
}
for (i = 0; i < nlb; i++) {
u64 pba = le64_to_cpu(entries[i]);
unsigned int mod;
/* LNVM treats address-spaces as silos, LBA and PBA are
* equally large and zero-indexed.
*/
if (unlikely(pba >= max_pages && pba != U64_MAX)) {
if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) {
pr_err("nvm: L2P data entry is out of bounds!\n");
return -EINVAL;
}
@ -1033,8 +1040,10 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
if (!pba)
continue;
div_u64_rem(pba, rrpc->nr_sects, &mod);
addr[i].addr = pba;
raddr[pba].addr = slba + i;
raddr[mod].addr = slba + i;
}
return 0;
@ -1046,16 +1055,16 @@ static int rrpc_map_init(struct rrpc *rrpc)
sector_t i;
int ret;
rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_pages);
rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects);
if (!rrpc->trans_map)
return -ENOMEM;
rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
* rrpc->nr_pages);
* rrpc->nr_sects);
if (!rrpc->rev_trans_map)
return -ENOMEM;
for (i = 0; i < rrpc->nr_pages; i++) {
for (i = 0; i < rrpc->nr_sects; i++) {
struct rrpc_addr *p = &rrpc->trans_map[i];
struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
@ -1067,8 +1076,8 @@ static int rrpc_map_init(struct rrpc *rrpc)
return 0;
/* Bring up the mapping table from device */
ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages,
rrpc_l2p_update, rrpc);
ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, rrpc_l2p_update,
rrpc);
if (ret) {
pr_err("nvm: rrpc: could not read L2P table.\n");
return -EINVAL;
@ -1141,7 +1150,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
struct rrpc_lun *rlun;
int i, j;
if (dev->pgs_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
pr_err("rrpc: number of pages per block too high.");
return -EINVAL;
}
@ -1168,7 +1177,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
spin_lock_init(&rlun->lock);
rrpc->total_blocks += dev->blks_per_lun;
rrpc->nr_pages += dev->sec_per_lun;
rrpc->nr_sects += dev->sec_per_lun;
rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
rrpc->dev->blks_per_lun);
@ -1221,9 +1230,9 @@ static sector_t rrpc_capacity(void *private)
/* cur, gc, and two emergency blocks for each lun */
reserved = rrpc->nr_luns * dev->max_pages_per_blk * 4;
provisioned = rrpc->nr_pages - reserved;
provisioned = rrpc->nr_sects - reserved;
if (reserved > rrpc->nr_pages) {
if (reserved > rrpc->nr_sects) {
pr_err("rrpc: not enough space available to expose storage.\n");
return 0;
}
@ -1242,10 +1251,11 @@ static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
struct nvm_dev *dev = rrpc->dev;
int offset;
struct rrpc_addr *laddr;
u64 paddr, pladdr;
u64 bpaddr, paddr, pladdr;
for (offset = 0; offset < dev->pgs_per_blk; offset++) {
paddr = block_to_addr(rrpc, rblk) + offset;
bpaddr = block_to_rel_addr(rrpc, rblk);
for (offset = 0; offset < dev->sec_per_blk; offset++) {
paddr = bpaddr + offset;
pladdr = rrpc->rev_trans_map[paddr].addr;
if (pladdr == ADDR_EMPTY)
@ -1386,7 +1396,7 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
pr_info("nvm: rrpc initialized with %u luns and %llu pages.\n",
rrpc->nr_luns, (unsigned long long)rrpc->nr_pages);
rrpc->nr_luns, (unsigned long long)rrpc->nr_sects);
mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));

View File

@ -104,7 +104,7 @@ struct rrpc {
struct rrpc_lun *luns;
/* calculated values */
unsigned long long nr_pages;
unsigned long long nr_sects;
unsigned long total_blocks;
/* Write strategy variables. Move these into each for structure for each
@ -156,6 +156,15 @@ struct rrpc_rev_addr {
u64 addr;
};
static inline struct rrpc_block *rrpc_get_rblk(struct rrpc_lun *rlun,
int blk_id)
{
struct rrpc *rrpc = rlun->rrpc;
int lun_blk = blk_id % rrpc->dev->blks_per_lun;
return &rlun->blocks[lun_blk];
}
static inline sector_t rrpc_get_laddr(struct bio *bio)
{
return bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
@ -206,7 +215,7 @@ static inline int rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
unsigned pages,
struct rrpc_inflight_rq *r)
{
BUG_ON((laddr + pages) > rrpc->nr_pages);
BUG_ON((laddr + pages) > rrpc->nr_sects);
return __rrpc_lock_laddr(rrpc, laddr, pages, r);
}
@ -243,7 +252,7 @@ static inline void rrpc_unlock_rq(struct rrpc *rrpc, struct nvm_rq *rqd)
struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
uint8_t pages = rqd->nr_pages;
BUG_ON((r->l_start + pages) > rrpc->nr_pages);
BUG_ON((r->l_start + pages) > rrpc->nr_sects);
rrpc_unlock_laddr(rrpc, r);
}

View File

@ -1015,8 +1015,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
*/
atomic_set(&dc->count, 1);
if (bch_cached_dev_writeback_start(dc))
/* Block writeback thread, but spawn it */
down_write(&dc->writeback_lock);
if (bch_cached_dev_writeback_start(dc)) {
up_write(&dc->writeback_lock);
return -ENOMEM;
}
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(dc);
@ -1028,6 +1032,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
bch_cached_dev_run(dc);
bcache_device_link(&dc->disk, c, "bdev");
/* Allow the writeback thread to proceed */
up_write(&dc->writeback_lock);
pr_info("Caching %s as %s on set %pU",
bdevname(dc->bdev, buf), dc->disk.disk->disk_name,
dc->disk.c->sb.set_uuid);
@ -1366,6 +1373,9 @@ static void cache_set_flush(struct closure *cl)
struct btree *b;
unsigned i;
if (!c)
closure_return(cl);
bch_cache_accounting_destroy(&c->accounting);
kobject_put(&c->internal);
@ -1828,11 +1838,12 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
return 0;
}
static void register_cache(struct cache_sb *sb, struct page *sb_page,
static int register_cache(struct cache_sb *sb, struct page *sb_page,
struct block_device *bdev, struct cache *ca)
{
char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory";
const char *err = NULL;
int ret = 0;
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev;
@ -1847,27 +1858,35 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
if (blk_queue_discard(bdev_get_queue(ca->bdev)))
ca->discard = CACHE_DISCARD(&ca->sb);
if (cache_alloc(sb, ca) != 0)
ret = cache_alloc(sb, ca);
if (ret != 0)
goto err;
err = "error creating kobject";
if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache"))
goto err;
if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) {
err = "error calling kobject_add";
ret = -ENOMEM;
goto out;
}
mutex_lock(&bch_register_lock);
err = register_cache_set(ca);
mutex_unlock(&bch_register_lock);
if (err)
goto err;
if (err) {
ret = -ENODEV;
goto out;
}
pr_info("registered cache device %s", bdevname(bdev, name));
out:
kobject_put(&ca->kobj);
return;
err:
pr_notice("error opening %s: %s", bdevname(bdev, name), err);
goto out;
if (err)
pr_notice("error opening %s: %s", bdevname(bdev, name), err);
return ret;
}
/* Global interfaces/init */
@ -1965,7 +1984,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!ca)
goto err_close;
register_cache(sb, sb_page, bdev, ca);
if (register_cache(sb, sb_page, bdev, ca) != 0)
goto err_close;
}
out:
if (sb_page)

View File

@ -1,6 +1,10 @@
config NVME_CORE
tristate
config BLK_DEV_NVME
tristate "NVM Express block device"
depends on PCI && BLOCK
select NVME_CORE
---help---
The NVM Express driver is for solid state drives directly
connected to the PCI or PCI Express bus. If you know you
@ -11,7 +15,7 @@ config BLK_DEV_NVME
config BLK_DEV_NVME_SCSI
bool "SCSI emulation for NVMe device nodes"
depends on BLK_DEV_NVME
depends on NVME_CORE
---help---
This adds support for the SG_IO ioctl on the NVMe character
and block devices nodes, as well a a translation for a small

View File

@ -1,6 +1,8 @@
obj-$(CONFIG_NVME_CORE) += nvme-core.o
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
nvme-core-y := core.o
nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
lightnvm-$(CONFIG_NVM) := lightnvm.o
nvme-y += core.o pci.o $(lightnvm-y)
nvme-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
nvme-y += pci.o

View File

@ -33,6 +33,20 @@
#define NVME_MINORS (1U << MINORBITS)
unsigned char admin_timeout = 60;
module_param(admin_timeout, byte, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
EXPORT_SYMBOL_GPL(admin_timeout);
unsigned char nvme_io_timeout = 30;
module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
EXPORT_SYMBOL_GPL(nvme_io_timeout);
unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
static int nvme_major;
module_param(nvme_major, int, 0);
@ -40,7 +54,7 @@ static int nvme_char_major;
module_param(nvme_char_major, int, 0);
static LIST_HEAD(nvme_ctrl_list);
DEFINE_SPINLOCK(dev_list_lock);
static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
@ -72,11 +86,21 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
spin_lock(&dev_list_lock);
ns = disk->private_data;
if (ns && !kref_get_unless_zero(&ns->kref))
ns = NULL;
if (ns) {
if (!kref_get_unless_zero(&ns->kref))
goto fail;
if (!try_module_get(ns->ctrl->ops->module))
goto fail_put_ns;
}
spin_unlock(&dev_list_lock);
return ns;
fail_put_ns:
kref_put(&ns->kref, nvme_free_ns);
fail:
spin_unlock(&dev_list_lock);
return NULL;
}
void nvme_requeue_req(struct request *req)
@ -89,6 +113,7 @@ void nvme_requeue_req(struct request *req)
blk_mq_kick_requeue_list(req->q);
spin_unlock_irqrestore(req->q->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(nvme_requeue_req);
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags)
@ -108,17 +133,18 @@ struct request *nvme_alloc_request(struct request_queue *q,
req->cmd = (unsigned char *)cmd;
req->cmd_len = sizeof(struct nvme_command);
req->special = (void *)0;
return req;
}
EXPORT_SYMBOL_GPL(nvme_alloc_request);
/*
* Returns 0 on success. If the result is negative, it's a Linux error code;
* if the result is positive, it's an NVM Express status code
*/
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
struct nvme_completion *cqe, void *buffer, unsigned bufflen,
unsigned timeout)
{
struct request *req;
int ret;
@ -128,6 +154,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
return PTR_ERR(req);
req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
req->special = cqe;
if (buffer && bufflen) {
ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
@ -136,8 +163,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
}
blk_execute_rq(req->q, NULL, req, 0);
if (result)
*result = (u32)(uintptr_t)req->special;
ret = req->errors;
out:
blk_mq_free_request(req);
@ -147,8 +172,9 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buffer, unsigned bufflen)
{
return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0);
}
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
void __user *ubuffer, unsigned bufflen,
@ -156,6 +182,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
u32 *result, unsigned timeout)
{
bool write = cmd->common.opcode & 1;
struct nvme_completion cqe;
struct nvme_ns *ns = q->queuedata;
struct gendisk *disk = ns ? ns->disk : NULL;
struct request *req;
@ -168,6 +195,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
return PTR_ERR(req);
req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
req->special = &cqe;
if (ubuffer && bufflen) {
ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
@ -222,7 +250,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
blk_execute_rq(req->q, disk, req, 0);
ret = req->errors;
if (result)
*result = (u32)(uintptr_t)req->special;
*result = le32_to_cpu(cqe.result);
if (meta && !ret && !write) {
if (copy_to_user(meta_buffer, meta, meta_len))
ret = -EFAULT;
@ -303,6 +331,8 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
dma_addr_t dma_addr, u32 *result)
{
struct nvme_command c;
struct nvme_completion cqe;
int ret;
memset(&c, 0, sizeof(c));
c.features.opcode = nvme_admin_get_features;
@ -310,13 +340,18 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
c.features.prp1 = cpu_to_le64(dma_addr);
c.features.fid = cpu_to_le32(fid);
return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
if (ret >= 0)
*result = le32_to_cpu(cqe.result);
return ret;
}
int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
dma_addr_t dma_addr, u32 *result)
{
struct nvme_command c;
struct nvme_completion cqe;
int ret;
memset(&c, 0, sizeof(c));
c.features.opcode = nvme_admin_set_features;
@ -324,7 +359,10 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
c.features.fid = cpu_to_le32(fid);
c.features.dword11 = cpu_to_le32(dword11);
return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
if (ret >= 0)
*result = le32_to_cpu(cqe.result);
return ret;
}
int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
@ -364,6 +402,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
*count = min(*count, nr_io_queues);
return 0;
}
EXPORT_SYMBOL_GPL(nvme_set_queue_count);
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
@ -504,7 +543,10 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
nvme_put_ns(disk->private_data);
struct nvme_ns *ns = disk->private_data;
module_put(ns->ctrl->ops->module);
nvme_put_ns(ns);
}
static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@ -545,8 +587,14 @@ static void nvme_init_integrity(struct nvme_ns *ns)
static void nvme_config_discard(struct nvme_ns *ns)
{
struct nvme_ctrl *ctrl = ns->ctrl;
u32 logical_block_size = queue_logical_block_size(ns->queue);
ns->queue->limits.discard_zeroes_data = 0;
if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES)
ns->queue->limits.discard_zeroes_data = 1;
else
ns->queue->limits.discard_zeroes_data = 0;
ns->queue->limits.discard_alignment = logical_block_size;
ns->queue->limits.discard_granularity = logical_block_size;
blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
@ -566,8 +614,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
return -ENODEV;
}
if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
__func__, ns->ctrl->instance, ns->ns_id);
dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n",
__func__);
return -ENODEV;
}
if (id->ncap == 0) {
@ -577,7 +625,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
if (nvme_nvm_register(ns->queue, disk->disk_name)) {
dev_warn(ns->ctrl->dev,
dev_warn(disk_to_dev(ns->disk),
"%s: LightNVM init failure\n", __func__);
kfree(id);
return -ENODEV;
@ -750,7 +798,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
dev_err(ctrl->dev,
dev_err(ctrl->device,
"Device not ready; aborting %s\n", enabled ?
"initialisation" : "reset");
return -ENODEV;
@ -778,6 +826,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
return ret;
return nvme_wait_ready(ctrl, cap, false);
}
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
{
@ -790,7 +839,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
int ret;
if (page_shift < dev_page_min) {
dev_err(ctrl->dev,
dev_err(ctrl->device,
"Minimum device page size %u too large for host (%u)\n",
1 << dev_page_min, 1 << page_shift);
return -ENODEV;
@ -809,6 +858,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
return ret;
return nvme_wait_ready(ctrl, cap, true);
}
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
@ -831,7 +881,7 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
dev_err(ctrl->dev,
dev_err(ctrl->device,
"Device shutdown incomplete; abort shutdown\n");
return -ENODEV;
}
@ -839,6 +889,7 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
return ret;
}
EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
struct request_queue *q)
@ -870,13 +921,13 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
if (ret) {
dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
return ret;
}
ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
if (ret) {
dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
return ret;
}
page_shift = NVME_CAP_MPSMIN(cap) + 12;
@ -886,13 +937,15 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ret = nvme_identify_ctrl(ctrl, &id);
if (ret) {
dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);
return -EIO;
}
ctrl->vid = le16_to_cpu(id->vid);
ctrl->oncs = le16_to_cpup(&id->oncs);
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
ctrl->cntlid = le16_to_cpup(&id->cntlid);
memcpy(ctrl->serial, id->sn, sizeof(id->sn));
memcpy(ctrl->model, id->mn, sizeof(id->mn));
memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
@ -919,6 +972,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
kfree(id);
return 0;
}
EXPORT_SYMBOL_GPL(nvme_init_identify);
static int nvme_dev_open(struct inode *inode, struct file *file)
{
@ -965,13 +1019,13 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
dev_warn(ctrl->dev,
dev_warn(ctrl->device,
"NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
ret = -EINVAL;
goto out_unlock;
}
dev_warn(ctrl->dev,
dev_warn(ctrl->device,
"using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
kref_get(&ns->kref);
mutex_unlock(&ctrl->namespaces_mutex);
@ -997,7 +1051,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
case NVME_IOCTL_IO_CMD:
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
dev_warn(ctrl->dev, "resetting controller\n");
dev_warn(ctrl->device, "resetting controller\n");
return ctrl->ops->reset_ctrl(ctrl);
case NVME_IOCTL_SUBSYS_RESET:
return nvme_reset_subsystem(ctrl);
@ -1028,6 +1082,30 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
}
static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns *ns = dev_to_disk(dev)->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
int serial_len = sizeof(ctrl->serial);
int model_len = sizeof(ctrl->model);
if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
return sprintf(buf, "eui.%16phN\n", ns->uuid);
if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
return sprintf(buf, "eui.%8phN\n", ns->eui);
while (ctrl->serial[serial_len - 1] == ' ')
serial_len--;
while (ctrl->model[model_len - 1] == ' ')
model_len--;
return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid,
serial_len, ctrl->serial, model_len, ctrl->model, ns->ns_id);
}
static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@ -1053,6 +1131,7 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
static struct attribute *nvme_ns_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
@ -1081,7 +1160,7 @@ static const struct attribute_group nvme_ns_attr_group = {
.is_visible = nvme_attrs_are_visible,
};
#define nvme_show_function(field) \
#define nvme_show_str_function(field) \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
@ -1090,15 +1169,26 @@ static ssize_t field##_show(struct device *dev, \
} \
static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
nvme_show_function(model);
nvme_show_function(serial);
nvme_show_function(firmware_rev);
#define nvme_show_int_function(field) \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
return sprintf(buf, "%d\n", ctrl->field); \
} \
static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
nvme_show_str_function(model);
nvme_show_str_function(serial);
nvme_show_str_function(firmware_rev);
nvme_show_int_function(cntlid);
static struct attribute *nvme_dev_attrs[] = {
&dev_attr_reset_controller.attr,
&dev_attr_model.attr,
&dev_attr_serial.attr,
&dev_attr_firmware_rev.attr,
&dev_attr_cntlid.attr,
NULL
};
@ -1308,6 +1398,7 @@ void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
mutex_unlock(&ctrl->namespaces_mutex);
kfree(id);
}
EXPORT_SYMBOL_GPL(nvme_scan_namespaces);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
{
@ -1316,6 +1407,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
nvme_ns_remove(ns);
}
EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
static DEFINE_IDA(nvme_instance_ida);
@ -1347,13 +1439,14 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl)
}
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
{
{
device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
spin_lock(&dev_list_lock);
list_del(&ctrl->node);
spin_unlock(&dev_list_lock);
}
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
static void nvme_free_ctrl(struct kref *kref)
{
@ -1370,6 +1463,7 @@ void nvme_put_ctrl(struct nvme_ctrl *ctrl)
{
kref_put(&ctrl->kref, nvme_free_ctrl);
}
EXPORT_SYMBOL_GPL(nvme_put_ctrl);
/*
* Initialize a NVMe controller structures. This needs to be called during
@ -1394,14 +1488,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
MKDEV(nvme_char_major, ctrl->instance),
dev, nvme_dev_attr_groups,
ctrl, nvme_dev_attr_groups,
"nvme%d", ctrl->instance);
if (IS_ERR(ctrl->device)) {
ret = PTR_ERR(ctrl->device);
goto out_release_instance;
}
get_device(ctrl->device);
dev_set_drvdata(ctrl->device, ctrl);
ida_init(&ctrl->ns_ida);
spin_lock(&dev_list_lock);
@ -1414,6 +1507,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
out:
return ret;
}
EXPORT_SYMBOL_GPL(nvme_init_ctrl);
/**
* nvme_kill_queues(): Ends all namespace queues
@ -1446,6 +1540,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
}
mutex_unlock(&ctrl->namespaces_mutex);
}
EXPORT_SYMBOL_GPL(nvme_kill_queues);
void nvme_stop_queues(struct nvme_ctrl *ctrl)
{
@ -1462,6 +1557,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
}
mutex_unlock(&ctrl->namespaces_mutex);
}
EXPORT_SYMBOL_GPL(nvme_stop_queues);
void nvme_start_queues(struct nvme_ctrl *ctrl)
{
@ -1475,6 +1571,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
}
mutex_unlock(&ctrl->namespaces_mutex);
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
int __init nvme_core_init(void)
{
@ -1514,3 +1611,8 @@ void nvme_core_exit(void)
class_destroy(nvme_class);
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
}
MODULE_LICENSE("GPL");
MODULE_VERSION("1.0");
module_init(nvme_core_init);
module_exit(nvme_core_exit);

View File

@ -379,8 +379,31 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
return ret;
}
static void nvme_nvm_bb_tbl_fold(struct nvm_dev *nvmdev,
int nr_dst_blks, u8 *dst_blks,
int nr_src_blks, u8 *src_blks)
{
int blk, offset, pl, blktype;
for (blk = 0; blk < nr_dst_blks; blk++) {
offset = blk * nvmdev->plane_mode;
blktype = src_blks[offset];
/* Bad blocks on any planes take precedence over other types */
for (pl = 0; pl < nvmdev->plane_mode; pl++) {
if (src_blks[offset + pl] &
(NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
blktype = src_blks[offset + pl];
break;
}
}
dst_blks[blk] = blktype;
}
}
static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
int nr_blocks, nvm_bb_update_fn *update_bbtbl,
int nr_dst_blks, nvm_bb_update_fn *update_bbtbl,
void *priv)
{
struct request_queue *q = nvmdev->q;
@ -388,7 +411,9 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_nvm_command c = {};
struct nvme_nvm_bb_tbl *bb_tbl;
int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
u8 *dst_blks = NULL;
int nr_src_blks = nr_dst_blks * nvmdev->plane_mode;
int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_src_blks;
int ret = 0;
c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
@ -399,6 +424,12 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
if (!bb_tbl)
return -ENOMEM;
dst_blks = kzalloc(nr_dst_blks, GFP_KERNEL);
if (!dst_blks) {
ret = -ENOMEM;
goto out;
}
ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c,
bb_tbl, tblsz);
if (ret) {
@ -420,16 +451,21 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
goto out;
}
if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
if (le32_to_cpu(bb_tbl->tblks) != nr_src_blks) {
ret = -EINVAL;
dev_err(ctrl->dev, "bbt unsuspected blocks returned (%u!=%u)",
le32_to_cpu(bb_tbl->tblks), nr_blocks);
le32_to_cpu(bb_tbl->tblks), nr_src_blks);
goto out;
}
nvme_nvm_bb_tbl_fold(nvmdev, nr_dst_blks, dst_blks,
nr_src_blks, bb_tbl->blk);
ppa = dev_to_generic_addr(nvmdev, ppa);
ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
ret = update_bbtbl(ppa, nr_dst_blks, dst_blks, priv);
out:
kfree(dst_blks);
kfree(bb_tbl);
return ret;
}

View File

@ -59,6 +59,12 @@ enum nvme_quirks {
* correctly.
*/
NVME_QUIRK_IDENTIFY_CNS = (1 << 1),
/*
* The controller deterministically returns O's on reads to discarded
* logical blocks.
*/
NVME_QUIRK_DISCARD_ZEROES = (1 << 2),
};
struct nvme_ctrl {
@ -78,6 +84,7 @@ struct nvme_ctrl {
char serial[20];
char model[40];
char firmware_rev[8];
int cntlid;
u32 ctrl_config;
@ -85,6 +92,7 @@ struct nvme_ctrl {
u32 max_hw_sectors;
u32 stripe_size;
u16 oncs;
u16 vid;
atomic_t abort_limit;
u8 event_limit;
u8 vwc;
@ -124,6 +132,7 @@ struct nvme_ns {
};
struct nvme_ctrl_ops {
struct module *module;
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
@ -255,7 +264,8 @@ void nvme_requeue_req(struct request *req);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buffer, unsigned bufflen, u32 *result, unsigned timeout);
struct nvme_completion *cqe, void *buffer, unsigned bufflen,
unsigned timeout);
int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
void __user *ubuffer, unsigned bufflen, u32 *result,
unsigned timeout);
@ -273,8 +283,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
dma_addr_t dma_addr, u32 *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
extern spinlock_t dev_list_lock;
struct sg_io_hdr;
int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);

View File

@ -27,7 +27,6 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
@ -39,6 +38,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/t10-pi.h>
#include <linux/timer.h>
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/unaligned.h>
@ -57,18 +57,6 @@
#define NVME_NR_AEN_COMMANDS 1
#define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
unsigned char admin_timeout = 60;
module_param(admin_timeout, byte, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
unsigned char nvme_io_timeout = 30;
module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
@ -76,10 +64,7 @@ static bool use_cmb_sqes = true;
module_param(use_cmb_sqes, bool, 0644);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread;
static struct workqueue_struct *nvme_workq;
static wait_queue_head_t nvme_kthread_wait;
struct nvme_dev;
struct nvme_queue;
@ -92,7 +77,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
struct nvme_dev {
struct list_head node;
struct nvme_queue **queues;
struct blk_mq_tag_set tagset;
struct blk_mq_tag_set admin_tagset;
@ -110,6 +94,8 @@ struct nvme_dev {
struct work_struct reset_work;
struct work_struct scan_work;
struct work_struct remove_work;
struct work_struct async_work;
struct timer_list watchdog_timer;
struct mutex shutdown_lock;
bool subsystem;
void __iomem *cmb;
@ -148,7 +134,6 @@ struct nvme_queue {
u32 __iomem *q_db;
u16 q_depth;
s16 cq_vector;
u16 sq_head;
u16 sq_tail;
u16 cq_head;
u16 qid;
@ -303,17 +288,20 @@ static void nvme_complete_async_event(struct nvme_dev *dev,
u16 status = le16_to_cpu(cqe->status) >> 1;
u32 result = le32_to_cpu(cqe->result);
if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) {
++dev->ctrl.event_limit;
queue_work(nvme_workq, &dev->async_work);
}
if (status != NVME_SC_SUCCESS)
return;
switch (result & 0xff07) {
case NVME_AER_NOTICE_NS_CHANGED:
dev_info(dev->dev, "rescanning\n");
dev_info(dev->ctrl.device, "rescanning\n");
nvme_queue_scan(dev);
default:
dev_warn(dev->dev, "async event result %08x\n", result);
dev_warn(dev->ctrl.device, "async event result %08x\n", result);
}
}
@ -727,7 +715,7 @@ static void nvme_complete_rq(struct request *req)
}
if (unlikely(iod->aborted)) {
dev_warn(dev->dev,
dev_warn(dev->ctrl.device,
"completing aborted command with status: %04x\n",
req->errors);
}
@ -749,7 +737,6 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
if ((status & 1) != phase)
break;
nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
if (++head == nvmeq->q_depth) {
head = 0;
phase = !phase;
@ -759,7 +746,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
*tag = -1;
if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
dev_warn(nvmeq->q_dmadev,
dev_warn(nvmeq->dev->ctrl.device,
"invalid id %d completed on queue %d\n",
cqe.command_id, le16_to_cpu(cqe.sq_id));
continue;
@ -778,10 +765,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
}
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
u32 result = le32_to_cpu(cqe.result);
req->special = (void *)(uintptr_t)result;
}
if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
memcpy(req->special, &cqe, sizeof(cqe));
blk_mq_complete_request(req, status >> 1);
}
@ -846,15 +831,22 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
return 0;
}
static void nvme_submit_async_event(struct nvme_dev *dev)
static void nvme_async_event_work(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, async_work);
struct nvme_queue *nvmeq = dev->queues[0];
struct nvme_command c;
memset(&c, 0, sizeof(c));
c.common.opcode = nvme_admin_async_event;
c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit;
__nvme_submit_cmd(dev->queues[0], &c);
spin_lock_irq(&nvmeq->q_lock);
while (dev->ctrl.event_limit > 0) {
c.common.command_id = NVME_AQ_BLKMQ_DEPTH +
--dev->ctrl.event_limit;
__nvme_submit_cmd(nvmeq, &c);
}
spin_unlock_irq(&nvmeq->q_lock);
}
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@ -924,12 +916,10 @@ static void abort_endio(struct request *req, int error)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = iod->nvmeq;
u32 result = (u32)(uintptr_t)req->special;
u16 status = req->errors;
dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
atomic_inc(&nvmeq->dev->ctrl.abort_limit);
blk_mq_free_request(req);
}
@ -948,7 +938,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
* shutdown, so we return BLK_EH_HANDLED.
*/
if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
dev_warn(dev->dev,
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
@ -962,7 +952,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
* returned to the driver, or if this is the admin queue.
*/
if (!nvmeq->qid || iod->aborted) {
dev_warn(dev->dev,
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
@ -988,8 +978,9 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
cmd.abort.cid = req->tag;
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n",
req->tag, nvmeq->qid);
dev_warn(nvmeq->dev->ctrl.device,
"I/O %d QID %d timeout, aborting\n",
req->tag, nvmeq->qid);
abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
BLK_MQ_REQ_NOWAIT);
@ -1018,7 +1009,7 @@ static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved
if (!blk_mq_request_started(req))
return;
dev_dbg_ratelimited(nvmeq->q_dmadev,
dev_dbg_ratelimited(nvmeq->dev->ctrl.device,
"Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
status = NVME_SC_ABORT_REQ;
@ -1173,9 +1164,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->qid = qid;
nvmeq->cq_vector = -1;
dev->queues[qid] = nvmeq;
/* make sure queue descriptor is set before queue count, for kthread */
mb();
dev->queue_count++;
return nvmeq;
@ -1360,53 +1348,31 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result;
}
static int nvme_kthread(void *data)
static void nvme_watchdog_timer(unsigned long data)
{
struct nvme_dev *dev, *next;
struct nvme_dev *dev = (struct nvme_dev *)data;
u32 csts = readl(dev->bar + NVME_REG_CSTS);
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
spin_lock(&dev_list_lock);
list_for_each_entry_safe(dev, next, &dev_list, node) {
int i;
u32 csts = readl(dev->bar + NVME_REG_CSTS);
/*
* Skip controllers currently under reset.
*/
if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work))
continue;
if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
csts & NVME_CSTS_CFS) {
if (queue_work(nvme_workq, &dev->reset_work)) {
dev_warn(dev->dev,
"Failed status: %x, reset controller\n",
readl(dev->bar + NVME_REG_CSTS));
}
continue;
}
for (i = 0; i < dev->queue_count; i++) {
struct nvme_queue *nvmeq = dev->queues[i];
if (!nvmeq)
continue;
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
while (i == 0 && dev->ctrl.event_limit > 0)
nvme_submit_async_event(dev);
spin_unlock_irq(&nvmeq->q_lock);
}
/*
* Skip controllers currently under reset.
*/
if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) &&
((csts & NVME_CSTS_CFS) ||
(dev->subsystem && (csts & NVME_CSTS_NSSRO)))) {
if (queue_work(nvme_workq, &dev->reset_work)) {
dev_warn(dev->dev,
"Failed status: 0x%x, reset controller.\n",
csts);
}
spin_unlock(&dev_list_lock);
schedule_timeout(round_jiffies_relative(HZ));
return;
}
return 0;
mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
}
static int nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i;
unsigned i, max;
int ret = 0;
for (i = dev->queue_count; i <= dev->max_qid; i++) {
@ -1416,7 +1382,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
}
}
for (i = dev->online_queues; i <= dev->queue_count - 1; i++) {
max = min(dev->max_qid, dev->queue_count - 1);
for (i = dev->online_queues; i <= max; i++) {
ret = nvme_create_queue(dev->queues[i], i);
if (ret) {
nvme_free_queues(dev, i);
@ -1507,7 +1474,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
* access to the admin queue, as that might be only way to fix them up.
*/
if (result > 0) {
dev_err(dev->dev, "Could not set queue count (%d)\n", result);
dev_err(dev->ctrl.device,
"Could not set queue count (%d)\n", result);
nr_io_queues = 0;
result = 0;
}
@ -1573,9 +1541,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
adminq->cq_vector = -1;
goto free_queues;
}
/* Free previously allocated queues that are no longer usable */
nvme_free_queues(dev, nr_io_queues + 1);
return nvme_create_io_queues(dev);
free_queues:
@ -1709,7 +1674,13 @@ static int nvme_dev_add(struct nvme_dev *dev)
if (blk_mq_alloc_tag_set(&dev->tagset))
return 0;
dev->ctrl.tagset = &dev->tagset;
} else {
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
/* Free previously allocated queues that are no longer usable */
nvme_free_queues(dev, dev->online_queues);
}
nvme_queue_scan(dev);
return 0;
}
@ -1796,56 +1767,12 @@ static void nvme_pci_disable(struct nvme_dev *dev)
}
}
static int nvme_dev_list_add(struct nvme_dev *dev)
{
bool start_thread = false;
spin_lock(&dev_list_lock);
if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
start_thread = true;
nvme_thread = NULL;
}
list_add(&dev->node, &dev_list);
spin_unlock(&dev_list_lock);
if (start_thread) {
nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
wake_up_all(&nvme_kthread_wait);
} else
wait_event_killable(nvme_kthread_wait, nvme_thread);
if (IS_ERR_OR_NULL(nvme_thread))
return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
return 0;
}
/*
* Remove the node from the device list and check
* for whether or not we need to stop the nvme_thread.
*/
static void nvme_dev_list_remove(struct nvme_dev *dev)
{
struct task_struct *tmp = NULL;
spin_lock(&dev_list_lock);
list_del_init(&dev->node);
if (list_empty(&dev_list) && !IS_ERR_OR_NULL(nvme_thread)) {
tmp = nvme_thread;
nvme_thread = NULL;
}
spin_unlock(&dev_list_lock);
if (tmp)
kthread_stop(tmp);
}
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
int i;
u32 csts = -1;
nvme_dev_list_remove(dev);
del_timer_sync(&dev->watchdog_timer);
mutex_lock(&dev->shutdown_lock);
if (pci_is_enabled(to_pci_dev(dev->dev))) {
@ -1907,7 +1834,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
{
dev_warn(dev->dev, "Removing after probe failure status: %d\n", status);
dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
kref_get(&dev->ctrl.kref);
nvme_dev_disable(dev, false);
@ -1954,17 +1881,16 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
queue_work(nvme_workq, &dev->async_work);
result = nvme_dev_list_add(dev);
if (result)
goto out;
mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
/*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
*/
if (dev->online_queues < 2) {
dev_warn(dev->dev, "IO queues not created\n");
dev_warn(dev->ctrl.device, "IO queues not created\n");
nvme_remove_namespaces(&dev->ctrl);
} else {
nvme_start_queues(&dev->ctrl);
@ -2032,6 +1958,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
}
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.module = THIS_MODULE,
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
@ -2089,10 +2016,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto free;
INIT_LIST_HEAD(&dev->node);
INIT_WORK(&dev->scan_work, nvme_dev_scan);
INIT_WORK(&dev->reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
INIT_WORK(&dev->async_work, nvme_async_event_work);
setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
(unsigned long)dev);
mutex_init(&dev->shutdown_lock);
init_completion(&dev->ioq_wait);
@ -2105,6 +2034,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto release_pools;
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
queue_work(nvme_workq, &dev->reset_work);
return 0;
@ -2145,8 +2076,11 @@ static void nvme_remove(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
del_timer_sync(&dev->watchdog_timer);
set_bit(NVME_CTRL_REMOVING, &dev->flags);
pci_set_drvdata(pdev, NULL);
flush_work(&dev->async_work);
flush_work(&dev->scan_work);
nvme_remove_namespaces(&dev->ctrl);
nvme_uninit_ctrl(&dev->ctrl);
@ -2192,7 +2126,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
* shutdown the controller to quiesce. The controller will be restarted
* after the slot reset through driver's slot_reset callback.
*/
dev_warn(&pdev->dev, "error detected: state:%d\n", state);
dev_warn(dev->ctrl.device, "error detected: state:%d\n", state);
switch (state) {
case pci_channel_io_normal:
return PCI_ERS_RESULT_CAN_RECOVER;
@ -2209,7 +2143,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
dev_info(&pdev->dev, "restart after slot reset\n");
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
queue_work(nvme_workq, &dev->reset_work);
return PCI_ERS_RESULT_RECOVERED;
@ -2232,7 +2166,8 @@ static const struct pci_error_handlers nvme_err_handler = {
static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(INTEL, 0x0953),
.driver_data = NVME_QUIRK_STRIPE_SIZE, },
.driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DISCARD_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
@ -2257,34 +2192,20 @@ static int __init nvme_init(void)
{
int result;
init_waitqueue_head(&nvme_kthread_wait);
nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
if (!nvme_workq)
return -ENOMEM;
result = nvme_core_init();
if (result < 0)
goto kill_workq;
result = pci_register_driver(&nvme_driver);
if (result)
goto core_exit;
return 0;
core_exit:
nvme_core_exit();
kill_workq:
destroy_workqueue(nvme_workq);
destroy_workqueue(nvme_workq);
return result;
}
static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
nvme_core_exit();
destroy_workqueue(nvme_workq);
BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
_nvme_check_size();
}

View File

@ -92,9 +92,9 @@ enum {
NVM_ADDRMODE_CHANNEL = 1,
/* Plane programming mode for LUN */
NVM_PLANE_SINGLE = 0,
NVM_PLANE_DOUBLE = 1,
NVM_PLANE_QUAD = 2,
NVM_PLANE_SINGLE = 1,
NVM_PLANE_DOUBLE = 2,
NVM_PLANE_QUAD = 4,
/* Status codes */
NVM_RSP_SUCCESS = 0x0,
@ -341,8 +341,8 @@ struct nvm_dev {
int lps_per_blk;
int *lptbl;
unsigned long total_pages;
unsigned long total_blocks;
unsigned long total_secs;
int nr_luns;
unsigned max_pages_per_blk;