2013-07-10 23:41:19 +01:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2010-2012 by Dell Inc. All rights reserved.
|
|
|
|
* Copyright (C) 2011-2013 Red Hat, Inc.
|
|
|
|
*
|
|
|
|
* This file is released under the GPL.
|
|
|
|
*
|
|
|
|
* dm-switch is a device-mapper target that maps IO to underlying block
|
|
|
|
* devices efficiently when there are a large number of fixed-sized
|
|
|
|
* address regions but there is no simple pattern to allow for a compact
|
|
|
|
* mapping representation such as dm-stripe.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/device-mapper.h>
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
|
|
|
|
#define DM_MSG_PREFIX "switch"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* One region_table_slot_t holds <region_entries_per_slot> region table
|
|
|
|
* entries each of which is <region_table_entry_bits> in size.
|
|
|
|
*/
|
|
|
|
typedef unsigned long region_table_slot_t;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A device with the offset to its start sector.
|
|
|
|
*/
|
|
|
|
struct switch_path {
|
|
|
|
struct dm_dev *dmdev;
|
|
|
|
sector_t start;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Context block for a dm switch device.
|
|
|
|
*/
|
|
|
|
struct switch_ctx {
|
|
|
|
struct dm_target *ti;
|
|
|
|
|
|
|
|
unsigned nr_paths; /* Number of paths in path_list. */
|
|
|
|
|
|
|
|
unsigned region_size; /* Region size in 512-byte sectors */
|
|
|
|
unsigned long nr_regions; /* Number of regions making up the device */
|
|
|
|
signed char region_size_bits; /* log2 of region_size or -1 */
|
|
|
|
|
|
|
|
unsigned char region_table_entry_bits; /* Number of bits in one region table entry */
|
|
|
|
unsigned char region_entries_per_slot; /* Number of entries in one region table slot */
|
|
|
|
signed char region_entries_per_slot_bits; /* log2 of region_entries_per_slot or -1 */
|
|
|
|
|
|
|
|
region_table_slot_t *region_table; /* Region table */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Array of dm devices to switch between.
|
|
|
|
*/
|
|
|
|
struct switch_path path_list[0];
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_paths,
|
|
|
|
unsigned region_size)
|
|
|
|
{
|
|
|
|
struct switch_ctx *sctx;
|
|
|
|
|
2019-01-08 10:02:33 -06:00
|
|
|
sctx = kzalloc(struct_size(sctx, path_list, nr_paths), GFP_KERNEL);
|
2013-07-10 23:41:19 +01:00
|
|
|
if (!sctx)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
sctx->ti = ti;
|
|
|
|
sctx->region_size = region_size;
|
|
|
|
|
|
|
|
ti->private = sctx;
|
|
|
|
|
|
|
|
return sctx;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int alloc_region_table(struct dm_target *ti, unsigned nr_paths)
|
|
|
|
{
|
|
|
|
struct switch_ctx *sctx = ti->private;
|
|
|
|
sector_t nr_regions = ti->len;
|
|
|
|
sector_t nr_slots;
|
|
|
|
|
|
|
|
if (!(sctx->region_size & (sctx->region_size - 1)))
|
|
|
|
sctx->region_size_bits = __ffs(sctx->region_size);
|
|
|
|
else
|
|
|
|
sctx->region_size_bits = -1;
|
|
|
|
|
|
|
|
sctx->region_table_entry_bits = 1;
|
|
|
|
while (sctx->region_table_entry_bits < sizeof(region_table_slot_t) * 8 &&
|
|
|
|
(region_table_slot_t)1 << sctx->region_table_entry_bits < nr_paths)
|
|
|
|
sctx->region_table_entry_bits++;
|
|
|
|
|
|
|
|
sctx->region_entries_per_slot = (sizeof(region_table_slot_t) * 8) / sctx->region_table_entry_bits;
|
|
|
|
if (!(sctx->region_entries_per_slot & (sctx->region_entries_per_slot - 1)))
|
|
|
|
sctx->region_entries_per_slot_bits = __ffs(sctx->region_entries_per_slot);
|
|
|
|
else
|
|
|
|
sctx->region_entries_per_slot_bits = -1;
|
|
|
|
|
|
|
|
if (sector_div(nr_regions, sctx->region_size))
|
|
|
|
nr_regions++;
|
|
|
|
|
2015-10-29 03:54:21 +09:00
|
|
|
if (nr_regions >= ULONG_MAX) {
|
2013-07-10 23:41:19 +01:00
|
|
|
ti->error = "Region table too large";
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2015-10-29 03:54:21 +09:00
|
|
|
sctx->nr_regions = nr_regions;
|
2013-07-10 23:41:19 +01:00
|
|
|
|
|
|
|
nr_slots = nr_regions;
|
|
|
|
if (sector_div(nr_slots, sctx->region_entries_per_slot))
|
|
|
|
nr_slots++;
|
|
|
|
|
|
|
|
if (nr_slots > ULONG_MAX / sizeof(region_table_slot_t)) {
|
|
|
|
ti->error = "Region table too large";
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
treewide: Use array_size() in vmalloc()
The vmalloc() function has no 2-factor argument form, so multiplication
factors need to be wrapped in array_size(). This patch replaces cases of:
vmalloc(a * b)
with:
vmalloc(array_size(a, b))
as well as handling cases of:
vmalloc(a * b * c)
with:
vmalloc(array3_size(a, b, c))
This does, however, attempt to ignore constant size factors like:
vmalloc(4 * 1024)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
vmalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
vmalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
vmalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
vmalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
vmalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
vmalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
vmalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
vmalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
vmalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
vmalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
vmalloc(
- sizeof(TYPE) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * COUNT_ID
+ array_size(COUNT_ID, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(THING) * (COUNT_ID)
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * COUNT_ID
+ array_size(COUNT_ID, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * (COUNT_CONST)
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * COUNT_CONST
+ array_size(COUNT_CONST, sizeof(THING))
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
vmalloc(
- SIZE * COUNT
+ array_size(COUNT, SIZE)
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
vmalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
vmalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
vmalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
vmalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
vmalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vmalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
vmalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
vmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
vmalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
vmalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
vmalloc(C1 * C2 * C3, ...)
|
vmalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants.
@@
expression E1, E2;
constant C1, C2;
@@
(
vmalloc(C1 * C2, ...)
|
vmalloc(
- E1 * E2
+ array_size(E1, E2)
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:27:11 -07:00
|
|
|
sctx->region_table = vmalloc(array_size(nr_slots,
|
|
|
|
sizeof(region_table_slot_t)));
|
2013-07-10 23:41:19 +01:00
|
|
|
if (!sctx->region_table) {
|
|
|
|
ti->error = "Cannot allocate region table";
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr,
|
|
|
|
unsigned long *region_index, unsigned *bit)
|
|
|
|
{
|
|
|
|
if (sctx->region_entries_per_slot_bits >= 0) {
|
|
|
|
*region_index = region_nr >> sctx->region_entries_per_slot_bits;
|
|
|
|
*bit = region_nr & (sctx->region_entries_per_slot - 1);
|
|
|
|
} else {
|
|
|
|
*region_index = region_nr / sctx->region_entries_per_slot;
|
|
|
|
*bit = region_nr % sctx->region_entries_per_slot;
|
|
|
|
}
|
|
|
|
|
|
|
|
*bit *= sctx->region_table_entry_bits;
|
|
|
|
}
|
|
|
|
|
2014-07-28 17:49:41 -04:00
|
|
|
static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr)
|
|
|
|
{
|
|
|
|
unsigned long region_index;
|
|
|
|
unsigned bit;
|
|
|
|
|
|
|
|
switch_get_position(sctx, region_nr, ®ion_index, &bit);
|
|
|
|
|
locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE()
Please do not apply this to mainline directly, instead please re-run the
coccinelle script shown below and apply its output.
For several reasons, it is desirable to use {READ,WRITE}_ONCE() in
preference to ACCESS_ONCE(), and new code is expected to use one of the
former. So far, there's been no reason to change most existing uses of
ACCESS_ONCE(), as these aren't harmful, and changing them results in
churn.
However, for some features, the read/write distinction is critical to
correct operation. To distinguish these cases, separate read/write
accessors must be used. This patch migrates (most) remaining
ACCESS_ONCE() instances to {READ,WRITE}_ONCE(), using the following
coccinelle script:
----
// Convert trivial ACCESS_ONCE() uses to equivalent READ_ONCE() and
// WRITE_ONCE()
// $ make coccicheck COCCI=/home/mark/once.cocci SPFLAGS="--include-headers" MODE=patch
virtual patch
@ depends on patch @
expression E1, E2;
@@
- ACCESS_ONCE(E1) = E2
+ WRITE_ONCE(E1, E2)
@ depends on patch @
expression E;
@@
- ACCESS_ONCE(E)
+ READ_ONCE(E)
----
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: davem@davemloft.net
Cc: linux-arch@vger.kernel.org
Cc: mpe@ellerman.id.au
Cc: shuah@kernel.org
Cc: snitzer@redhat.com
Cc: thor.thayer@linux.intel.com
Cc: tj@kernel.org
Cc: viro@zeniv.linux.org.uk
Cc: will.deacon@arm.com
Link: http://lkml.kernel.org/r/1508792849-3115-19-git-send-email-paulmck@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-10-23 14:07:29 -07:00
|
|
|
return (READ_ONCE(sctx->region_table[region_index]) >> bit) &
|
2014-07-28 17:49:41 -04:00
|
|
|
((1 << sctx->region_table_entry_bits) - 1);
|
|
|
|
}
|
|
|
|
|
2013-07-10 23:41:19 +01:00
|
|
|
/*
|
|
|
|
* Find which path to use at given offset.
|
|
|
|
*/
|
|
|
|
static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
|
|
|
|
{
|
2014-07-28 17:49:41 -04:00
|
|
|
unsigned path_nr;
|
2013-07-10 23:41:19 +01:00
|
|
|
sector_t p;
|
|
|
|
|
|
|
|
p = offset;
|
|
|
|
if (sctx->region_size_bits >= 0)
|
|
|
|
p >>= sctx->region_size_bits;
|
|
|
|
else
|
|
|
|
sector_div(p, sctx->region_size);
|
|
|
|
|
2014-07-28 17:49:41 -04:00
|
|
|
path_nr = switch_region_table_read(sctx, p);
|
2013-07-10 23:41:19 +01:00
|
|
|
|
|
|
|
/* This can only happen if the processor uses non-atomic stores. */
|
|
|
|
if (unlikely(path_nr >= sctx->nr_paths))
|
|
|
|
path_nr = 0;
|
|
|
|
|
|
|
|
return path_nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void switch_region_table_write(struct switch_ctx *sctx, unsigned long region_nr,
|
|
|
|
unsigned value)
|
|
|
|
{
|
|
|
|
unsigned long region_index;
|
|
|
|
unsigned bit;
|
|
|
|
region_table_slot_t pte;
|
|
|
|
|
|
|
|
switch_get_position(sctx, region_nr, ®ion_index, &bit);
|
|
|
|
|
|
|
|
pte = sctx->region_table[region_index];
|
|
|
|
pte &= ~((((region_table_slot_t)1 << sctx->region_table_entry_bits) - 1) << bit);
|
|
|
|
pte |= (region_table_slot_t)value << bit;
|
|
|
|
sctx->region_table[region_index] = pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fill the region table with an initial round robin pattern.
|
|
|
|
*/
|
|
|
|
static void initialise_region_table(struct switch_ctx *sctx)
|
|
|
|
{
|
|
|
|
unsigned path_nr = 0;
|
|
|
|
unsigned long region_nr;
|
|
|
|
|
|
|
|
for (region_nr = 0; region_nr < sctx->nr_regions; region_nr++) {
|
|
|
|
switch_region_table_write(sctx, region_nr, path_nr);
|
|
|
|
if (++path_nr >= sctx->nr_paths)
|
|
|
|
path_nr = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_path(struct dm_arg_set *as, struct dm_target *ti)
|
|
|
|
{
|
|
|
|
struct switch_ctx *sctx = ti->private;
|
|
|
|
unsigned long long start;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
|
|
|
|
&sctx->path_list[sctx->nr_paths].dmdev);
|
|
|
|
if (r) {
|
|
|
|
ti->error = "Device lookup failed";
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (kstrtoull(dm_shift_arg(as), 10, &start) || start != (sector_t)start) {
|
|
|
|
ti->error = "Invalid device starting offset";
|
|
|
|
dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
sctx->path_list[sctx->nr_paths].start = start;
|
|
|
|
|
|
|
|
sctx->nr_paths++;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Destructor: Don't free the dm_target, just the ti->private data (if any).
|
|
|
|
*/
|
|
|
|
static void switch_dtr(struct dm_target *ti)
|
|
|
|
{
|
|
|
|
struct switch_ctx *sctx = ti->private;
|
|
|
|
|
|
|
|
while (sctx->nr_paths--)
|
|
|
|
dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
|
|
|
|
|
|
|
|
vfree(sctx->region_table);
|
|
|
|
kfree(sctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Constructor arguments:
|
|
|
|
* <num_paths> <region_size> <num_optional_args> [<optional_args>...]
|
|
|
|
* [<dev_path> <offset>]+
|
|
|
|
*
|
|
|
|
* Optional args are to allow for future extension: currently this
|
|
|
|
* parameter must be 0.
|
|
|
|
*/
|
|
|
|
static int switch_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|
|
|
{
|
2017-06-22 11:32:45 -07:00
|
|
|
static const struct dm_arg _args[] = {
|
2013-07-10 23:41:19 +01:00
|
|
|
{1, (KMALLOC_MAX_SIZE - sizeof(struct switch_ctx)) / sizeof(struct switch_path), "Invalid number of paths"},
|
|
|
|
{1, UINT_MAX, "Invalid region size"},
|
|
|
|
{0, 0, "Invalid number of optional args"},
|
|
|
|
};
|
|
|
|
|
|
|
|
struct switch_ctx *sctx;
|
|
|
|
struct dm_arg_set as;
|
|
|
|
unsigned nr_paths, region_size, nr_optional_args;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
as.argc = argc;
|
|
|
|
as.argv = argv;
|
|
|
|
|
|
|
|
r = dm_read_arg(_args, &as, &nr_paths, &ti->error);
|
|
|
|
if (r)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
r = dm_read_arg(_args + 1, &as, ®ion_size, &ti->error);
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
r = dm_read_arg_group(_args + 2, &as, &nr_optional_args, &ti->error);
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
/* parse optional arguments here, if we add any */
|
|
|
|
|
|
|
|
if (as.argc != nr_paths * 2) {
|
|
|
|
ti->error = "Incorrect number of path arguments";
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
sctx = alloc_switch_ctx(ti, nr_paths, region_size);
|
|
|
|
if (!sctx) {
|
|
|
|
ti->error = "Cannot allocate redirection context";
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = dm_set_target_max_io_len(ti, region_size);
|
|
|
|
if (r)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
while (as.argc) {
|
|
|
|
r = parse_path(&as, ti);
|
|
|
|
if (r)
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = alloc_region_table(ti, nr_paths);
|
|
|
|
if (r)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
initialise_region_table(sctx);
|
|
|
|
|
|
|
|
/* For UNMAP, sending the request down any path is sufficient */
|
|
|
|
ti->num_discard_bios = 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
switch_dtr(ti);
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int switch_map(struct dm_target *ti, struct bio *bio)
|
|
|
|
{
|
|
|
|
struct switch_ctx *sctx = ti->private;
|
2013-10-11 15:44:27 -07:00
|
|
|
sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector);
|
2013-07-10 23:41:19 +01:00
|
|
|
unsigned path_nr = switch_get_path_nr(sctx, offset);
|
|
|
|
|
2017-08-23 19:10:32 +02:00
|
|
|
bio_set_dev(bio, sctx->path_list[path_nr].dmdev->bdev);
|
2013-10-11 15:44:27 -07:00
|
|
|
bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset;
|
2013-07-10 23:41:19 +01:00
|
|
|
|
|
|
|
return DM_MAPIO_REMAPPED;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to parse hex numbers in the message as quickly as possible.
|
|
|
|
*
|
|
|
|
* This table-based hex parser improves performance.
|
|
|
|
* It improves a time to load 1000000 entries compared to the condition-based
|
|
|
|
* parser.
|
|
|
|
* table-based parser condition-based parser
|
|
|
|
* PA-RISC 0.29s 0.31s
|
|
|
|
* Opteron 0.0495s 0.0498s
|
|
|
|
*/
|
|
|
|
static const unsigned char hex_table[256] = {
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
|
|
|
};
|
|
|
|
|
|
|
|
static __always_inline unsigned long parse_hex(const char **string)
|
|
|
|
{
|
|
|
|
unsigned char d;
|
|
|
|
unsigned long r = 0;
|
|
|
|
|
|
|
|
while ((d = hex_table[(unsigned char)**string]) < 16) {
|
|
|
|
r = (r << 4) | d;
|
|
|
|
(*string)++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int process_set_region_mappings(struct switch_ctx *sctx,
|
2014-07-28 18:11:25 -04:00
|
|
|
unsigned argc, char **argv)
|
2013-07-10 23:41:19 +01:00
|
|
|
{
|
|
|
|
unsigned i;
|
|
|
|
unsigned long region_index = 0;
|
|
|
|
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
unsigned long path_nr;
|
|
|
|
const char *string = argv[i];
|
|
|
|
|
2014-07-28 18:11:25 -04:00
|
|
|
if ((*string & 0xdf) == 'R') {
|
|
|
|
unsigned long cycle_length, num_write;
|
|
|
|
|
|
|
|
string++;
|
|
|
|
if (unlikely(*string == ',')) {
|
|
|
|
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
cycle_length = parse_hex(&string);
|
|
|
|
if (unlikely(*string != ',')) {
|
|
|
|
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
string++;
|
|
|
|
if (unlikely(!*string)) {
|
|
|
|
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
num_write = parse_hex(&string);
|
|
|
|
if (unlikely(*string)) {
|
|
|
|
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) {
|
|
|
|
DMWARN("invalid set_region_mappings cycle length: %lu > %lu",
|
|
|
|
cycle_length - 1, region_index);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (unlikely(region_index + num_write < region_index) ||
|
|
|
|
unlikely(region_index + num_write >= sctx->nr_regions)) {
|
|
|
|
DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu",
|
|
|
|
region_index, num_write, sctx->nr_regions);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (num_write--) {
|
|
|
|
region_index++;
|
|
|
|
path_nr = switch_region_table_read(sctx, region_index - cycle_length);
|
|
|
|
switch_region_table_write(sctx, region_index, path_nr);
|
|
|
|
}
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-07-10 23:41:19 +01:00
|
|
|
if (*string == ':')
|
|
|
|
region_index++;
|
|
|
|
else {
|
|
|
|
region_index = parse_hex(&string);
|
|
|
|
if (unlikely(*string != ':')) {
|
|
|
|
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
string++;
|
|
|
|
if (unlikely(!*string)) {
|
|
|
|
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
path_nr = parse_hex(&string);
|
|
|
|
if (unlikely(*string)) {
|
|
|
|
DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (unlikely(region_index >= sctx->nr_regions)) {
|
|
|
|
DMWARN("invalid set_region_mappings region number: %lu >= %lu", region_index, sctx->nr_regions);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (unlikely(path_nr >= sctx->nr_paths)) {
|
|
|
|
DMWARN("invalid set_region_mappings device: %lu >= %u", path_nr, sctx->nr_paths);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch_region_table_write(sctx, region_index, path_nr);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Messages are processed one-at-a-time.
|
|
|
|
*
|
|
|
|
* Only set_region_mappings is supported.
|
|
|
|
*/
|
2018-02-28 15:59:59 -05:00
|
|
|
static int switch_message(struct dm_target *ti, unsigned argc, char **argv,
|
|
|
|
char *result, unsigned maxlen)
|
2013-07-10 23:41:19 +01:00
|
|
|
{
|
|
|
|
static DEFINE_MUTEX(message_mutex);
|
|
|
|
|
|
|
|
struct switch_ctx *sctx = ti->private;
|
|
|
|
int r = -EINVAL;
|
|
|
|
|
|
|
|
mutex_lock(&message_mutex);
|
|
|
|
|
|
|
|
if (!strcasecmp(argv[0], "set_region_mappings"))
|
|
|
|
r = process_set_region_mappings(sctx, argc, argv);
|
|
|
|
else
|
|
|
|
DMWARN("Unrecognised message received.");
|
|
|
|
|
|
|
|
mutex_unlock(&message_mutex);
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void switch_status(struct dm_target *ti, status_type_t type,
|
|
|
|
unsigned status_flags, char *result, unsigned maxlen)
|
|
|
|
{
|
|
|
|
struct switch_ctx *sctx = ti->private;
|
|
|
|
unsigned sz = 0;
|
|
|
|
int path_nr;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case STATUSTYPE_INFO:
|
|
|
|
result[0] = '\0';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case STATUSTYPE_TABLE:
|
|
|
|
DMEMIT("%u %u 0", sctx->nr_paths, sctx->region_size);
|
|
|
|
for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++)
|
|
|
|
DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name,
|
|
|
|
(unsigned long long)sctx->path_list[path_nr].start);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Switch ioctl:
|
|
|
|
*
|
|
|
|
* Passthrough all ioctls to the path for sector 0
|
|
|
|
*/
|
2018-04-03 16:54:10 -04:00
|
|
|
static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
|
2013-07-10 23:41:19 +01:00
|
|
|
{
|
|
|
|
struct switch_ctx *sctx = ti->private;
|
|
|
|
unsigned path_nr;
|
|
|
|
|
|
|
|
path_nr = switch_get_path_nr(sctx, 0);
|
|
|
|
|
2015-10-15 14:10:50 +02:00
|
|
|
*bdev = sctx->path_list[path_nr].dmdev->bdev;
|
2013-07-10 23:41:19 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Only pass ioctls through if the device sizes match exactly.
|
|
|
|
*/
|
2015-10-15 14:10:50 +02:00
|
|
|
if (ti->len + sctx->path_list[path_nr].start !=
|
|
|
|
i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
2013-07-10 23:41:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int switch_iterate_devices(struct dm_target *ti,
|
|
|
|
iterate_devices_callout_fn fn, void *data)
|
|
|
|
{
|
|
|
|
struct switch_ctx *sctx = ti->private;
|
|
|
|
int path_nr;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++) {
|
|
|
|
r = fn(ti, sctx->path_list[path_nr].dmdev,
|
|
|
|
sctx->path_list[path_nr].start, ti->len, data);
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct target_type switch_target = {
|
|
|
|
.name = "switch",
|
2014-07-28 18:11:25 -04:00
|
|
|
.version = {1, 1, 0},
|
2013-07-10 23:41:19 +01:00
|
|
|
.module = THIS_MODULE,
|
|
|
|
.ctr = switch_ctr,
|
|
|
|
.dtr = switch_dtr,
|
|
|
|
.map = switch_map,
|
|
|
|
.message = switch_message,
|
|
|
|
.status = switch_status,
|
2015-10-15 14:10:50 +02:00
|
|
|
.prepare_ioctl = switch_prepare_ioctl,
|
2013-07-10 23:41:19 +01:00
|
|
|
.iterate_devices = switch_iterate_devices,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init dm_switch_init(void)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = dm_register_target(&switch_target);
|
|
|
|
if (r < 0)
|
|
|
|
DMERR("dm_register_target() failed %d", r);
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit dm_switch_exit(void)
|
|
|
|
{
|
|
|
|
dm_unregister_target(&switch_target);
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(dm_switch_init);
|
|
|
|
module_exit(dm_switch_exit);
|
|
|
|
|
|
|
|
MODULE_DESCRIPTION(DM_NAME " dynamic path switching target");
|
|
|
|
MODULE_AUTHOR("Kevin D. O'Kelley <Kevin_OKelley@dell.com>");
|
|
|
|
MODULE_AUTHOR("Narendran Ganapathy <Narendran_Ganapathy@dell.com>");
|
|
|
|
MODULE_AUTHOR("Jim Ramsay <Jim_Ramsay@dell.com>");
|
|
|
|
MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
|
|
|
|
MODULE_LICENSE("GPL");
|