2009-06-22 10:12:27 +01:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2004-2005 IBM Corp. All Rights Reserved.
|
|
|
|
* Copyright (C) 2006-2009 NEC Corporation.
|
|
|
|
*
|
|
|
|
* dm-queue-length.c
|
|
|
|
*
|
|
|
|
* Module Author: Stefan Bader, IBM
|
|
|
|
* Modified by: Kiyoshi Ueda, NEC
|
|
|
|
*
|
|
|
|
* This file is released under the GPL.
|
|
|
|
*
|
|
|
|
* queue-length path selector - choose a path with the least number of
|
|
|
|
* in-flight I/Os.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "dm.h"
|
|
|
|
#include "dm-path-selector.h"
|
|
|
|
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/ctype.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/module.h>
|
2011-07-26 16:09:06 -07:00
|
|
|
#include <linux/atomic.h>
|
2009-06-22 10:12:27 +01:00
|
|
|
|
|
|
|
#define DM_MSG_PREFIX "multipath queue-length"
|
2016-02-10 11:58:45 -05:00
|
|
|
#define QL_MIN_IO 1
|
|
|
|
#define QL_VERSION "0.2.0"
|
2009-06-22 10:12:27 +01:00
|
|
|
|
|
|
|
struct selector {
|
|
|
|
struct list_head valid_paths;
|
|
|
|
struct list_head failed_paths;
|
2016-02-15 14:25:00 -05:00
|
|
|
spinlock_t lock;
|
2009-06-22 10:12:27 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
struct path_info {
|
|
|
|
struct list_head list;
|
|
|
|
struct dm_path *path;
|
|
|
|
unsigned repeat_count;
|
|
|
|
atomic_t qlen; /* the number of in-flight I/Os */
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct selector *alloc_selector(void)
|
|
|
|
{
|
|
|
|
struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
|
|
|
|
|
|
|
|
if (s) {
|
|
|
|
INIT_LIST_HEAD(&s->valid_paths);
|
|
|
|
INIT_LIST_HEAD(&s->failed_paths);
|
2016-02-15 14:25:00 -05:00
|
|
|
spin_lock_init(&s->lock);
|
2009-06-22 10:12:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ql_create(struct path_selector *ps, unsigned argc, char **argv)
|
|
|
|
{
|
|
|
|
struct selector *s = alloc_selector();
|
|
|
|
|
|
|
|
if (!s)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
ps->context = s;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ql_free_paths(struct list_head *paths)
|
|
|
|
{
|
|
|
|
struct path_info *pi, *next;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(pi, next, paths, list) {
|
|
|
|
list_del(&pi->list);
|
|
|
|
kfree(pi);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ql_destroy(struct path_selector *ps)
|
|
|
|
{
|
|
|
|
struct selector *s = ps->context;
|
|
|
|
|
|
|
|
ql_free_paths(&s->valid_paths);
|
|
|
|
ql_free_paths(&s->failed_paths);
|
|
|
|
kfree(s);
|
|
|
|
ps->context = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ql_status(struct path_selector *ps, struct dm_path *path,
|
|
|
|
status_type_t type, char *result, unsigned maxlen)
|
|
|
|
{
|
|
|
|
unsigned sz = 0;
|
|
|
|
struct path_info *pi;
|
|
|
|
|
|
|
|
/* When called with NULL path, return selector status/args. */
|
|
|
|
if (!path)
|
|
|
|
DMEMIT("0 ");
|
|
|
|
else {
|
|
|
|
pi = path->pscontext;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case STATUSTYPE_INFO:
|
|
|
|
DMEMIT("%d ", atomic_read(&pi->qlen));
|
|
|
|
break;
|
|
|
|
case STATUSTYPE_TABLE:
|
|
|
|
DMEMIT("%u ", pi->repeat_count);
|
|
|
|
break;
|
2021-07-12 17:49:03 -07:00
|
|
|
case STATUSTYPE_IMA:
|
|
|
|
*result = '\0';
|
|
|
|
break;
|
2009-06-22 10:12:27 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return sz;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ql_add_path(struct path_selector *ps, struct dm_path *path,
|
|
|
|
int argc, char **argv, char **error)
|
|
|
|
{
|
|
|
|
struct selector *s = ps->context;
|
|
|
|
struct path_info *pi;
|
|
|
|
unsigned repeat_count = QL_MIN_IO;
|
dm: reject trailing characters in sccanf input
Device mapper uses sscanf to convert arguments to numbers. The problem is that
the way we use it ignores additional unmatched characters in the scanned string.
For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number,
but also it will match number with some garbage appended, like "123abc".
As a result, device mapper accepts garbage after some numbers. For example
the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"'
will pass without an error.
This patch fixes all sscanf uses in device mapper. It appends "%c" with
a pointer to a dummy character variable to every sscanf statement.
The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds
only if string is a null-terminated number (optionally preceded by some
whitespace characters). If there is some character appended after the number,
sscanf matches "%c", writes the character to the dummy variable and returns 2.
We check the return value for 1 and consequently reject numbers with some
garbage appended.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2012-03-28 18:41:26 +01:00
|
|
|
char dummy;
|
2016-02-15 14:25:00 -05:00
|
|
|
unsigned long flags;
|
2009-06-22 10:12:27 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Arguments: [<repeat_count>]
|
|
|
|
* <repeat_count>: The number of I/Os before switching path.
|
|
|
|
* If not given, default (QL_MIN_IO) is used.
|
|
|
|
*/
|
|
|
|
if (argc > 1) {
|
|
|
|
*error = "queue-length ps: incorrect number of arguments";
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
dm: reject trailing characters in sccanf input
Device mapper uses sscanf to convert arguments to numbers. The problem is that
the way we use it ignores additional unmatched characters in the scanned string.
For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number,
but also it will match number with some garbage appended, like "123abc".
As a result, device mapper accepts garbage after some numbers. For example
the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"'
will pass without an error.
This patch fixes all sscanf uses in device mapper. It appends "%c" with
a pointer to a dummy character variable to every sscanf statement.
The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds
only if string is a null-terminated number (optionally preceded by some
whitespace characters). If there is some character appended after the number,
sscanf matches "%c", writes the character to the dummy variable and returns 2.
We check the return value for 1 and consequently reject numbers with some
garbage appended.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2012-03-28 18:41:26 +01:00
|
|
|
if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
|
2009-06-22 10:12:27 +01:00
|
|
|
*error = "queue-length ps: invalid repeat count";
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-02-10 11:58:45 -05:00
|
|
|
if (repeat_count > 1) {
|
|
|
|
DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead");
|
|
|
|
repeat_count = 1;
|
|
|
|
}
|
|
|
|
|
2009-06-22 10:12:27 +01:00
|
|
|
/* Allocate the path information structure */
|
|
|
|
pi = kmalloc(sizeof(*pi), GFP_KERNEL);
|
|
|
|
if (!pi) {
|
|
|
|
*error = "queue-length ps: Error allocating path information";
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
pi->path = path;
|
|
|
|
pi->repeat_count = repeat_count;
|
|
|
|
atomic_set(&pi->qlen, 0);
|
|
|
|
|
|
|
|
path->pscontext = pi;
|
|
|
|
|
2016-02-15 14:25:00 -05:00
|
|
|
spin_lock_irqsave(&s->lock, flags);
|
2009-06-22 10:12:27 +01:00
|
|
|
list_add_tail(&pi->list, &s->valid_paths);
|
2016-02-15 14:25:00 -05:00
|
|
|
spin_unlock_irqrestore(&s->lock, flags);
|
2009-06-22 10:12:27 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ql_fail_path(struct path_selector *ps, struct dm_path *path)
|
|
|
|
{
|
|
|
|
struct selector *s = ps->context;
|
|
|
|
struct path_info *pi = path->pscontext;
|
2016-02-15 14:25:00 -05:00
|
|
|
unsigned long flags;
|
2009-06-22 10:12:27 +01:00
|
|
|
|
2016-02-15 14:25:00 -05:00
|
|
|
spin_lock_irqsave(&s->lock, flags);
|
2009-06-22 10:12:27 +01:00
|
|
|
list_move(&pi->list, &s->failed_paths);
|
2016-02-15 14:25:00 -05:00
|
|
|
spin_unlock_irqrestore(&s->lock, flags);
|
2009-06-22 10:12:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int ql_reinstate_path(struct path_selector *ps, struct dm_path *path)
|
|
|
|
{
|
|
|
|
struct selector *s = ps->context;
|
|
|
|
struct path_info *pi = path->pscontext;
|
2016-02-15 14:25:00 -05:00
|
|
|
unsigned long flags;
|
2009-06-22 10:12:27 +01:00
|
|
|
|
2016-02-15 14:25:00 -05:00
|
|
|
spin_lock_irqsave(&s->lock, flags);
|
2009-06-22 10:12:27 +01:00
|
|
|
list_move_tail(&pi->list, &s->valid_paths);
|
2016-02-15 14:25:00 -05:00
|
|
|
spin_unlock_irqrestore(&s->lock, flags);
|
2009-06-22 10:12:27 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Select a path having the minimum number of in-flight I/Os
|
|
|
|
*/
|
2016-02-17 21:29:17 -05:00
|
|
|
static struct dm_path *ql_select_path(struct path_selector *ps, size_t nr_bytes)
|
2009-06-22 10:12:27 +01:00
|
|
|
{
|
|
|
|
struct selector *s = ps->context;
|
|
|
|
struct path_info *pi = NULL, *best = NULL;
|
2016-02-15 14:25:00 -05:00
|
|
|
struct dm_path *ret = NULL;
|
|
|
|
unsigned long flags;
|
2009-06-22 10:12:27 +01:00
|
|
|
|
2016-02-15 14:25:00 -05:00
|
|
|
spin_lock_irqsave(&s->lock, flags);
|
2009-06-22 10:12:27 +01:00
|
|
|
if (list_empty(&s->valid_paths))
|
2016-02-15 14:25:00 -05:00
|
|
|
goto out;
|
2009-06-22 10:12:27 +01:00
|
|
|
|
|
|
|
list_for_each_entry(pi, &s->valid_paths, list) {
|
|
|
|
if (!best ||
|
|
|
|
(atomic_read(&pi->qlen) < atomic_read(&best->qlen)))
|
|
|
|
best = pi;
|
|
|
|
|
|
|
|
if (!atomic_read(&best->qlen))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!best)
|
2016-02-15 14:25:00 -05:00
|
|
|
goto out;
|
2009-06-22 10:12:27 +01:00
|
|
|
|
dm mpath selector: more evenly distribute ties
Move the last used path to the end of the list (least preferred) so that
ties are more evenly distributed.
For example, in case with three paths with one that is slower than
others, the remaining two would be unevenly used if they tie. This is
due to the rotation not being a truely fair distribution.
Illustrated: paths a, b, c, 'c' has 1 outstanding IO, a and b are 'tied'
Three possible rotations:
(a, b, c) -> best path 'a'
(b, c, a) -> best path 'b'
(c, a, b) -> best path 'a'
(a, b, c) -> best path 'a'
(b, c, a) -> best path 'b'
(c, a, b) -> best path 'a'
...
So 'a' is used 2x more than 'b', although they should be used evenly.
With this change, the most recently used path is always the least
preferred, removing this bias resulting in even distribution.
(a, b, c) -> best path 'a'
(b, c, a) -> best path 'b'
(c, a, b) -> best path 'a'
(c, b, a) -> best path 'b'
...
Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2018-01-19 15:07:37 -08:00
|
|
|
/* Move most recently used to least preferred to evenly balance. */
|
|
|
|
list_move_tail(&best->list, &s->valid_paths);
|
|
|
|
|
2016-02-15 14:25:00 -05:00
|
|
|
ret = best->path;
|
|
|
|
out:
|
|
|
|
spin_unlock_irqrestore(&s->lock, flags);
|
|
|
|
return ret;
|
2009-06-22 10:12:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int ql_start_io(struct path_selector *ps, struct dm_path *path,
|
|
|
|
size_t nr_bytes)
|
|
|
|
{
|
|
|
|
struct path_info *pi = path->pscontext;
|
|
|
|
|
|
|
|
atomic_inc(&pi->qlen);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ql_end_io(struct path_selector *ps, struct dm_path *path,
|
2020-04-30 16:48:29 -04:00
|
|
|
size_t nr_bytes, u64 start_time)
|
2009-06-22 10:12:27 +01:00
|
|
|
{
|
|
|
|
struct path_info *pi = path->pscontext;
|
|
|
|
|
|
|
|
atomic_dec(&pi->qlen);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct path_selector_type ql_ps = {
|
|
|
|
.name = "queue-length",
|
|
|
|
.module = THIS_MODULE,
|
|
|
|
.table_args = 1,
|
|
|
|
.info_args = 1,
|
|
|
|
.create = ql_create,
|
|
|
|
.destroy = ql_destroy,
|
|
|
|
.status = ql_status,
|
|
|
|
.add_path = ql_add_path,
|
|
|
|
.fail_path = ql_fail_path,
|
|
|
|
.reinstate_path = ql_reinstate_path,
|
|
|
|
.select_path = ql_select_path,
|
|
|
|
.start_io = ql_start_io,
|
|
|
|
.end_io = ql_end_io,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init dm_ql_init(void)
|
|
|
|
{
|
|
|
|
int r = dm_register_path_selector(&ql_ps);
|
|
|
|
|
|
|
|
if (r < 0)
|
|
|
|
DMERR("register failed %d", r);
|
|
|
|
|
|
|
|
DMINFO("version " QL_VERSION " loaded");
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit dm_ql_exit(void)
|
|
|
|
{
|
|
|
|
int r = dm_unregister_path_selector(&ql_ps);
|
|
|
|
|
|
|
|
if (r < 0)
|
|
|
|
DMERR("unregister failed %d", r);
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(dm_ql_init);
|
|
|
|
module_exit(dm_ql_exit);
|
|
|
|
|
|
|
|
MODULE_AUTHOR("Stefan Bader <Stefan.Bader at de.ibm.com>");
|
|
|
|
MODULE_DESCRIPTION(
|
|
|
|
"(C) Copyright IBM Corp. 2004,2005 All Rights Reserved.\n"
|
|
|
|
DM_NAME " path selector to balance the number of in-flight I/Os"
|
|
|
|
);
|
|
|
|
MODULE_LICENSE("GPL");
|