mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
block: add an API for Persistent Reservations
This commits adds a driver API and ioctls for controlling Persistent Reservations s/genericly/generically/ at the block layer. Persistent Reservations are supported by SCSI and NVMe and allow controlling who gets access to a device in a shared storage setup. Note that we add a pr_ops structure to struct block_device_operations instead of adding the members directly to avoid bloating all instances of devices that will never support Persistent Reservations. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
d8e4bb8103
commit
bbd3e06436
119
Documentation/block/pr.txt
Normal file
119
Documentation/block/pr.txt
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
|
||||||
|
Block layer support for Persistent Reservations
|
||||||
|
===============================================
|
||||||
|
|
||||||
|
The Linux kernel supports a user space interface for simplified
|
||||||
|
Persistent Reservations which map to block devices that support
|
||||||
|
these (like SCSI). Persistent Reservations allow restricting
|
||||||
|
access to block devices to specific initiators in a shared storage
|
||||||
|
setup.
|
||||||
|
|
||||||
|
This document gives a general overview of the support ioctl commands.
|
||||||
|
For a more detailed reference please refer the the SCSI Primary
|
||||||
|
Commands standard, specifically the section on Reservations and the
|
||||||
|
"PERSISTENT RESERVE IN" and "PERSISTENT RESERVE OUT" commands.
|
||||||
|
|
||||||
|
All implementations are expected to ensure the reservations survive
|
||||||
|
a power loss and cover all connections in a multi path environment.
|
||||||
|
These behaviors are optional in SPC but will be automatically applied
|
||||||
|
by Linux.
|
||||||
|
|
||||||
|
|
||||||
|
The following types of reservations are supported:
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
- PR_WRITE_EXCLUSIVE
|
||||||
|
|
||||||
|
Only the initiator that owns the reservation can write to the
|
||||||
|
device. Any initiator can read from the device.
|
||||||
|
|
||||||
|
- PR_EXCLUSIVE_ACCESS
|
||||||
|
|
||||||
|
Only the initiator that owns the reservation can access the
|
||||||
|
device.
|
||||||
|
|
||||||
|
- PR_WRITE_EXCLUSIVE_REG_ONLY
|
||||||
|
|
||||||
|
Only initiators with a registered key can write to the device,
|
||||||
|
Any initiator can read from the device.
|
||||||
|
|
||||||
|
- PR_EXCLUSIVE_ACCESS_REG_ONLY
|
||||||
|
|
||||||
|
Only initiators with a registered key can access the device.
|
||||||
|
|
||||||
|
- PR_WRITE_EXCLUSIVE_ALL_REGS
|
||||||
|
|
||||||
|
Only initiators with a registered key can write to the device,
|
||||||
|
Any initiator can read from the device.
|
||||||
|
All initiators with a registered key are considered reservation
|
||||||
|
holders.
|
||||||
|
Please reference the SPC spec on the meaning of a reservation
|
||||||
|
holder if you want to use this type.
|
||||||
|
|
||||||
|
- PR_EXCLUSIVE_ACCESS_ALL_REGS
|
||||||
|
|
||||||
|
Only initiators with a registered key can access the device.
|
||||||
|
All initiators with a registered key are considered reservation
|
||||||
|
holders.
|
||||||
|
Please reference the SPC spec on the meaning of a reservation
|
||||||
|
holder if you want to use this type.
|
||||||
|
|
||||||
|
|
||||||
|
The following ioctl are supported:
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
1. IOC_PR_REGISTER
|
||||||
|
|
||||||
|
This ioctl command registers a new reservation if the new_key argument
|
||||||
|
is non-null. If no existing reservation exists old_key must be zero,
|
||||||
|
if an existing reservation should be replaced old_key must contain
|
||||||
|
the old reservation key.
|
||||||
|
|
||||||
|
If the new_key argument is 0 it unregisters the existing reservation passed
|
||||||
|
in old_key.
|
||||||
|
|
||||||
|
|
||||||
|
2. IOC_PR_RESERVE
|
||||||
|
|
||||||
|
This ioctl command reserves the device and thus restricts access for other
|
||||||
|
devices based on the type argument. The key argument must be the existing
|
||||||
|
reservation key for the device as acquired by the IOC_PR_REGISTER,
|
||||||
|
IOC_PR_REGISTER_IGNORE, IOC_PR_PREEMPT or IOC_PR_PREEMPT_ABORT commands.
|
||||||
|
|
||||||
|
|
||||||
|
3. IOC_PR_RELEASE
|
||||||
|
|
||||||
|
This ioctl command releases the reservation specified by key and flags
|
||||||
|
and thus removes any access restriction implied by it.
|
||||||
|
|
||||||
|
|
||||||
|
4. IOC_PR_PREEMPT
|
||||||
|
|
||||||
|
This ioctl command releases the existing reservation referred to by
|
||||||
|
old_key and replaces it with a a new reservation of type for the
|
||||||
|
reservation key new_key.
|
||||||
|
|
||||||
|
|
||||||
|
5. IOC_PR_PREEMPT_ABORT
|
||||||
|
|
||||||
|
This ioctl command works like IOC_PR_PREEMPT except that it also aborts
|
||||||
|
any outstanding command sent over a connection identified by old_key.
|
||||||
|
|
||||||
|
6. IOC_PR_CLEAR
|
||||||
|
|
||||||
|
This ioctl command unregisters both key and any other reservation key
|
||||||
|
registered with the device and drops any existing reservation.
|
||||||
|
|
||||||
|
|
||||||
|
Flags
|
||||||
|
-----
|
||||||
|
|
||||||
|
All the ioctls have a flag field. Currently only one flag is supported:
|
||||||
|
|
||||||
|
- PR_FL_IGNORE_KEY
|
||||||
|
|
||||||
|
Ignore the existing reservation key. This is commonly supported for
|
||||||
|
IOC_PR_REGISTER, and some implementation may support the flag for
|
||||||
|
IOC_PR_RESERVE.
|
||||||
|
|
||||||
|
For all unknown flags the kernel will return -EOPNOTSUPP.
|
103
block/ioctl.c
103
block/ioctl.c
@ -7,6 +7,7 @@
|
|||||||
#include <linux/backing-dev.h>
|
#include <linux/backing-dev.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/blktrace_api.h>
|
#include <linux/blktrace_api.h>
|
||||||
|
#include <linux/pr.h>
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
|
|
||||||
static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
|
static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
|
||||||
@ -295,6 +296,96 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
|
|||||||
*/
|
*/
|
||||||
EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
|
EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
|
||||||
|
|
||||||
|
static int blkdev_pr_register(struct block_device *bdev,
|
||||||
|
struct pr_registration __user *arg)
|
||||||
|
{
|
||||||
|
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
|
||||||
|
struct pr_registration reg;
|
||||||
|
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
if (!ops || !ops->pr_register)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
if (copy_from_user(®, arg, sizeof(reg)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
if (reg.flags & ~PR_FL_IGNORE_KEY)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int blkdev_pr_reserve(struct block_device *bdev,
|
||||||
|
struct pr_reservation __user *arg)
|
||||||
|
{
|
||||||
|
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
|
||||||
|
struct pr_reservation rsv;
|
||||||
|
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
if (!ops || !ops->pr_reserve)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
if (copy_from_user(&rsv, arg, sizeof(rsv)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
if (rsv.flags & ~PR_FL_IGNORE_KEY)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int blkdev_pr_release(struct block_device *bdev,
|
||||||
|
struct pr_reservation __user *arg)
|
||||||
|
{
|
||||||
|
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
|
||||||
|
struct pr_reservation rsv;
|
||||||
|
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
if (!ops || !ops->pr_release)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
if (copy_from_user(&rsv, arg, sizeof(rsv)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
if (rsv.flags)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
return ops->pr_release(bdev, rsv.key, rsv.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int blkdev_pr_preempt(struct block_device *bdev,
|
||||||
|
struct pr_preempt __user *arg, bool abort)
|
||||||
|
{
|
||||||
|
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
|
||||||
|
struct pr_preempt p;
|
||||||
|
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
if (!ops || !ops->pr_preempt)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
if (copy_from_user(&p, arg, sizeof(p)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
if (p.flags)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int blkdev_pr_clear(struct block_device *bdev,
|
||||||
|
struct pr_clear __user *arg)
|
||||||
|
{
|
||||||
|
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
|
||||||
|
struct pr_clear c;
|
||||||
|
|
||||||
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
if (!ops || !ops->pr_clear)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
if (copy_from_user(&c, arg, sizeof(c)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
if (c.flags)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
return ops->pr_clear(bdev, c.key);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is it an unrecognized ioctl? The correct returns are either
|
* Is it an unrecognized ioctl? The correct returns are either
|
||||||
* ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
|
* ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
|
||||||
@ -477,6 +568,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
|
|||||||
case BLKTRACESETUP:
|
case BLKTRACESETUP:
|
||||||
case BLKTRACETEARDOWN:
|
case BLKTRACETEARDOWN:
|
||||||
return blk_trace_ioctl(bdev, cmd, argp);
|
return blk_trace_ioctl(bdev, cmd, argp);
|
||||||
|
case IOC_PR_REGISTER:
|
||||||
|
return blkdev_pr_register(bdev, argp);
|
||||||
|
case IOC_PR_RESERVE:
|
||||||
|
return blkdev_pr_reserve(bdev, argp);
|
||||||
|
case IOC_PR_RELEASE:
|
||||||
|
return blkdev_pr_release(bdev, argp);
|
||||||
|
case IOC_PR_PREEMPT:
|
||||||
|
return blkdev_pr_preempt(bdev, argp, false);
|
||||||
|
case IOC_PR_PREEMPT_ABORT:
|
||||||
|
return blkdev_pr_preempt(bdev, argp, true);
|
||||||
|
case IOC_PR_CLEAR:
|
||||||
|
return blkdev_pr_clear(bdev, argp);
|
||||||
default:
|
default:
|
||||||
return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
|
return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
|
||||||
}
|
}
|
||||||
|
@ -35,6 +35,7 @@ struct sg_io_hdr;
|
|||||||
struct bsg_job;
|
struct bsg_job;
|
||||||
struct blkcg_gq;
|
struct blkcg_gq;
|
||||||
struct blk_flush_queue;
|
struct blk_flush_queue;
|
||||||
|
struct pr_ops;
|
||||||
|
|
||||||
#define BLKDEV_MIN_RQ 4
|
#define BLKDEV_MIN_RQ 4
|
||||||
#define BLKDEV_MAX_RQ 128 /* Default maximum */
|
#define BLKDEV_MAX_RQ 128 /* Default maximum */
|
||||||
@ -1633,6 +1634,7 @@ struct block_device_operations {
|
|||||||
/* this callback is with swap_lock and sometimes page table lock held */
|
/* this callback is with swap_lock and sometimes page table lock held */
|
||||||
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
|
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
|
||||||
struct module *owner;
|
struct module *owner;
|
||||||
|
const struct pr_ops *pr_ops;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
|
extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
|
||||||
|
18
include/linux/pr.h
Normal file
18
include/linux/pr.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef LINUX_PR_H
|
||||||
|
#define LINUX_PR_H
|
||||||
|
|
||||||
|
#include <uapi/linux/pr.h>
|
||||||
|
|
||||||
|
struct pr_ops {
|
||||||
|
int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
|
||||||
|
u32 flags);
|
||||||
|
int (*pr_reserve)(struct block_device *bdev, u64 key,
|
||||||
|
enum pr_type type, u32 flags);
|
||||||
|
int (*pr_release)(struct block_device *bdev, u64 key,
|
||||||
|
enum pr_type type);
|
||||||
|
int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
|
||||||
|
enum pr_type type, bool abort);
|
||||||
|
int (*pr_clear)(struct block_device *bdev, u64 key);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* LINUX_PR_H */
|
48
include/uapi/linux/pr.h
Normal file
48
include/uapi/linux/pr.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#ifndef _UAPI_PR_H
|
||||||
|
#define _UAPI_PR_H
|
||||||
|
|
||||||
|
enum pr_type {
|
||||||
|
PR_WRITE_EXCLUSIVE = 1,
|
||||||
|
PR_EXCLUSIVE_ACCESS = 2,
|
||||||
|
PR_WRITE_EXCLUSIVE_REG_ONLY = 3,
|
||||||
|
PR_EXCLUSIVE_ACCESS_REG_ONLY = 4,
|
||||||
|
PR_WRITE_EXCLUSIVE_ALL_REGS = 5,
|
||||||
|
PR_EXCLUSIVE_ACCESS_ALL_REGS = 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pr_reservation {
|
||||||
|
__u64 key;
|
||||||
|
__u32 type;
|
||||||
|
__u32 flags;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pr_registration {
|
||||||
|
__u64 old_key;
|
||||||
|
__u64 new_key;
|
||||||
|
__u32 flags;
|
||||||
|
__u32 __pad;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pr_preempt {
|
||||||
|
__u64 old_key;
|
||||||
|
__u64 new_key;
|
||||||
|
__u32 type;
|
||||||
|
__u32 flags;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pr_clear {
|
||||||
|
__u64 key;
|
||||||
|
__u32 flags;
|
||||||
|
__u32 __pad;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */
|
||||||
|
|
||||||
|
#define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration)
|
||||||
|
#define IOC_PR_RESERVE _IOW('p', 201, struct pr_reservation)
|
||||||
|
#define IOC_PR_RELEASE _IOW('p', 202, struct pr_reservation)
|
||||||
|
#define IOC_PR_PREEMPT _IOW('p', 203, struct pr_preempt)
|
||||||
|
#define IOC_PR_PREEMPT_ABORT _IOW('p', 204, struct pr_preempt)
|
||||||
|
#define IOC_PR_CLEAR _IOW('p', 205, struct pr_clear)
|
||||||
|
|
||||||
|
#endif /* _UAPI_PR_H */
|
Loading…
Reference in New Issue
Block a user