John Garry bf4ae8f2e6 scsi: sd: Atomic write support
Support is divided into two main areas:
- reading VPD pages and setting sdev request_queue limits
- support WRITE ATOMIC (16) command and tracing

The relevant block limits VPD page need to be read to allow the block layer
request_queue atomic write limits to be set. These VPD page limits are
described in sbc4r22 section 6.6.4 - Block limits VPD page.

There are five limits of interest:
- MAXIMUM ATOMIC TRANSFER LENGTH
- ATOMIC ALIGNMENT
- ATOMIC TRANSFER LENGTH GRANULARITY
- MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY
- MAXIMUM ATOMIC BOUNDARY SIZE

MAXIMUM ATOMIC TRANSFER LENGTH is the maximum length for a WRITE ATOMIC
(16) command. It will not be greater than the device MAXIMUM TRANSFER
LENGTH.

ATOMIC ALIGNMENT and ATOMIC TRANSFER LENGTH GRANULARITY are the minimum
alignment and length values for an atomic write in terms of logical blocks.

Unlike NVMe, SCSI does not specify an LBA space boundary, but does specify
a per-IO boundary granularity. The maximum boundary size is specified in
MAXIMUM ATOMIC BOUNDARY SIZE. When used, this boundary value is set in the
WRITE ATOMIC (16) ATOMIC BOUNDARY field - layout for the WRITE_ATOMIC_16
command can be found in sbc4r22 section 5.48. This boundary value is the
granularity size at which the device may atomically write the data. A value
of zero in WRITE ATOMIC (16) ATOMIC BOUNDARY field means that all data must
be atomically written together.

MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY is the maximum atomic write
length if a non-zero boundary value is set.

For atomic write support, the WRITE ATOMIC (16) boundary is not of much
interest, as the block layer expects each request submitted to be executed
atomically. However, the SCSI spec does leave itself open to a quirky
scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero, yet MAXIMUM ATOMIC
TRANSFER LENGTH WITH BOUNDARY and MAXIMUM ATOMIC BOUNDARY SIZE are both
non-zero. This case will be supported.

To set the block layer request_queue atomic write capabilities, sanitize
the VPD page limits and set limits as follows:
- atomic_write_unit_min is derived from granularity and alignment values.
  If no granularity value is not set, use physical block size
- atomic_write_unit_max is derived from MAXIMUM ATOMIC TRANSFER LENGTH. In
  the scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero and boundary
  limits are non-zero, use MAXIMUM ATOMIC BOUNDARY SIZE for
  atomic_write_unit_max. New flag scsi_disk.use_atomic_write_boundary is
  set for this scenario.
- atomic_write_boundary_bytes is set to zero always

SCSI also supports a WRITE ATOMIC (32) command, which is for type 2
protection enabled. This is not going to be supported now, so check for
T10_PI_TYPE2_PROTECTION when setting any request_queue limits.

To handle an atomic write request, add support for WRITE ATOMIC (16)
command in handler sd_setup_atomic_cmnd(). Flag use_atomic_write_boundary
is checked here for encoding ATOMIC BOUNDARY field.

Trace info is also added for WRITE_ATOMIC_16 command.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20240620125359.2684798-9-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-06-20 15:19:17 -06:00

279 lines
7.3 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _SCSI_DISK_H
#define _SCSI_DISK_H
/*
* More than enough for everybody ;) The huge number of majors
* is a leftover from 16bit dev_t days, we don't really need that
* much numberspace.
*/
#define SD_MAJORS 16
/*
* Time out in seconds for disks and Magneto-opticals (which are slower).
*/
#define SD_TIMEOUT (30 * HZ)
#define SD_MOD_TIMEOUT (75 * HZ)
/*
* Flush timeout is a multiplier over the standard device timeout which is
* user modifiable via sysfs but initially set to SD_TIMEOUT
*/
#define SD_FLUSH_TIMEOUT_MULTIPLIER 2
#define SD_WRITE_SAME_TIMEOUT (120 * HZ)
/*
* Number of allowed retries
*/
#define SD_MAX_RETRIES 5
#define SD_PASSTHROUGH_RETRIES 1
#define SD_MAX_MEDIUM_TIMEOUTS 2
/*
* Size of the initial data buffer for mode and read capacity data
*/
#define SD_BUF_SIZE 512
/*
* Number of sectors at the end of the device to avoid multi-sector
* accesses to in the case of last_sector_bug
*/
#define SD_LAST_BUGGY_SECTORS 8
enum {
SD_EXT_CDB_SIZE = 32, /* Extended CDB size */
SD_MEMPOOL_SIZE = 2, /* CDB pool size */
};
enum {
SD_DEF_XFER_BLOCKS = 0xffff,
SD_MAX_XFER_BLOCKS = 0xffffffff,
SD_MAX_WS10_BLOCKS = 0xffff,
SD_MAX_WS16_BLOCKS = 0x7fffff,
};
enum {
SD_LBP_FULL = 0, /* Full logical block provisioning */
SD_LBP_UNMAP, /* Use UNMAP command */
SD_LBP_WS16, /* Use WRITE SAME(16) with UNMAP bit */
SD_LBP_WS10, /* Use WRITE SAME(10) with UNMAP bit */
SD_LBP_ZERO, /* Use WRITE SAME(10) with zero payload */
SD_LBP_DISABLE, /* Discard disabled due to failed cmd */
};
enum {
SD_ZERO_WRITE = 0, /* Use WRITE(10/16) command */
SD_ZERO_WS, /* Use WRITE SAME(10/16) command */
SD_ZERO_WS16_UNMAP, /* Use WRITE SAME(16) with UNMAP */
SD_ZERO_WS10_UNMAP, /* Use WRITE SAME(10) with UNMAP */
};
/**
* struct zoned_disk_info - Specific properties of a ZBC SCSI device.
* @nr_zones: number of zones.
* @zone_blocks: number of logical blocks per zone.
*
* This data structure holds the ZBC SCSI device properties that are retrieved
* twice: a first time before the gendisk capacity is known and a second time
* after the gendisk capacity is known.
*/
struct zoned_disk_info {
u32 nr_zones;
u32 zone_blocks;
};
struct scsi_disk {
struct scsi_device *device;
/*
* disk_dev is used to show attributes in /sys/class/scsi_disk/,
* but otherwise not really needed. Do not use for refcounting.
*/
struct device disk_dev;
struct gendisk *disk;
struct opal_dev *opal_dev;
#ifdef CONFIG_BLK_DEV_ZONED
/* Updated during revalidation before the gendisk capacity is known. */
struct zoned_disk_info early_zone_info;
/* Updated during revalidation after the gendisk capacity is known. */
struct zoned_disk_info zone_info;
u32 zones_optimal_open;
u32 zones_optimal_nonseq;
u32 zones_max_open;
/*
* Either zero or a power of two. If not zero it means that the offset
* between zone starting LBAs is constant.
*/
u32 zone_starting_lba_gran;
#endif
atomic_t openers;
sector_t capacity; /* size in logical blocks */
int max_retries;
u32 min_xfer_blocks;
u32 max_xfer_blocks;
u32 opt_xfer_blocks;
u32 max_ws_blocks;
u32 max_unmap_blocks;
u32 unmap_granularity;
u32 unmap_alignment;
u32 max_atomic;
u32 atomic_alignment;
u32 atomic_granularity;
u32 max_atomic_with_boundary;
u32 max_atomic_boundary;
u32 index;
unsigned int physical_block_size;
unsigned int max_medium_access_timeouts;
unsigned int medium_access_timed_out;
/* number of permanent streams */
u16 permanent_stream_count;
u8 media_present;
u8 write_prot;
u8 protection_type;/* Data Integrity Field */
u8 provisioning_mode;
u8 zeroing_mode;
u8 nr_actuators; /* Number of actuators */
bool suspended; /* Disk is suspended (stopped) */
unsigned ATO : 1; /* state of disk ATO bit */
unsigned cache_override : 1; /* temp override of WCE,RCD */
unsigned WCE : 1; /* state of disk WCE bit */
unsigned RCD : 1; /* state of disk RCD bit, unused */
unsigned DPOFUA : 1; /* state of disk DPOFUA bit */
unsigned first_scan : 1;
unsigned lbpme : 1;
unsigned lbprz : 1;
unsigned lbpu : 1;
unsigned lbpws : 1;
unsigned lbpws10 : 1;
unsigned lbpvpd : 1;
unsigned ws10 : 1;
unsigned ws16 : 1;
unsigned rc_basis: 2;
unsigned zoned: 2;
unsigned urswrz : 1;
unsigned security : 1;
unsigned ignore_medium_access_errors : 1;
unsigned rscs : 1; /* reduced stream control support */
unsigned use_atomic_write_boundary : 1;
};
#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)
static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
{
return disk->private_data;
}
#define sd_printk(prefix, sdsk, fmt, a...) \
(sdsk)->disk ? \
sdev_prefix_printk(prefix, (sdsk)->device, \
(sdsk)->disk->disk_name, fmt, ##a) : \
sdev_printk(prefix, (sdsk)->device, fmt, ##a)
#define sd_first_printk(prefix, sdsk, fmt, a...) \
do { \
if ((sdsk)->first_scan) \
sd_printk(prefix, sdsk, fmt, ##a); \
} while (0)
static inline int scsi_medium_access_command(struct scsi_cmnd *scmd)
{
switch (scmd->cmnd[0]) {
case READ_6:
case READ_10:
case READ_12:
case READ_16:
case SYNCHRONIZE_CACHE:
case VERIFY:
case VERIFY_12:
case VERIFY_16:
case WRITE_6:
case WRITE_10:
case WRITE_12:
case WRITE_16:
case WRITE_SAME:
case WRITE_SAME_16:
case UNMAP:
return 1;
case VARIABLE_LENGTH_CMD:
switch (scmd->cmnd[9]) {
case READ_32:
case VERIFY_32:
case WRITE_32:
case WRITE_SAME_32:
return 1;
}
}
return 0;
}
static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blocks)
{
return blocks << (ilog2(sdev->sector_size) - 9);
}
static inline unsigned int logical_to_bytes(struct scsi_device *sdev, sector_t blocks)
{
return blocks * sdev->sector_size;
}
static inline sector_t bytes_to_logical(struct scsi_device *sdev, unsigned int bytes)
{
return bytes >> ilog2(sdev->sector_size);
}
static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sector)
{
return sector >> (ilog2(sdev->sector_size) - 9);
}
void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim);
#ifdef CONFIG_BLK_DEV_ZONED
int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim,
u8 buf[SD_BUF_SIZE]);
int sd_zbc_revalidate_zones(struct scsi_disk *sdkp);
blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
unsigned char op, bool all);
unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
struct scsi_sense_hdr *sshdr);
int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
struct queue_limits *lim, u8 buf[SD_BUF_SIZE])
{
return 0;
}
static inline int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
{
return 0;
}
static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
unsigned char op,
bool all)
{
return BLK_STS_TARGET;
}
static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd,
unsigned int good_bytes, struct scsi_sense_hdr *sshdr)
{
return good_bytes;
}
#define sd_zbc_report_zones NULL
#endif /* CONFIG_BLK_DEV_ZONED */
void sd_print_sense_hdr(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr);
void sd_print_result(const struct scsi_disk *sdkp, const char *msg, int result);
#endif /* _SCSI_DISK_H */