mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-11 00:08:50 +00:00
- Revert a request-based DM core change that caused IO latency to
increase and adversely impact both throughput and system load - Fix for a use after free bug in DM core's device cleanup - A couple DM btree removal fixes (used by dm-thinp) - A DM thinp fix for order-5 allocation failure - A DM thinp fix to not degrade to read-only metadata mode when in out-of-data-space mode for longer than the 'no_space_timeout' - Fix a long-standing oversight in both dm-thinp and dm-cache by now exporting 'needs_check' in status if it was set in metadata - Fix an embarrassing dm-cache busy-loop that caused worker threads to eat cpu even if no IO was actively being issued to the cache device -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJVqUfSAAoJEMUj8QotnQNa+RUH+wXrHCGI6J7RHIXVd5igP9K0 yFZGEnLZe6Ebt5CACLcKn/qN0g97iwCrlcxFt+1Gj/GbW1GIQzs7vg38La3PZxWZ jAkI3JMY816bP1x3VK1HtMsk2gRaE/hh0gxK5pPLB9a+ZdEsz9UML0rs+JseOdn3 n+454dhwOyChwz7zFEbpn+mfjoruFScGX0Y2qaSHBV/xMhmExpthw9V1yFC2v2tW 8cAHOMDLNLHhR5adF9YxjZH8wILbyYK9oPy3iGhj/TF/Dx7saWYG4UlnL5xIOLsB 5WK9gRrJJ/Wf0FsDdN88AaY4Bdpj4esS2JeTZpvujxeBb7ZNeJoCUqyzggURv/c= =hCjo -----END PGP SIGNATURE----- Merge tag 'dm-4.2-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper fixes from Mike Snitzer: - revert a request-based DM core change that caused IO latency to increase and adversely impact both throughput and system load - fix for a use after free bug in DM core's device cleanup - a couple DM btree removal fixes (used by dm-thinp) - a DM thinp fix for order-5 allocation failure - a DM thinp fix to not degrade to read-only metadata mode when in out-of-data-space mode for longer than the 'no_space_timeout' - fix a long-standing oversight in both dm-thinp and dm-cache by now exporting 'needs_check' in status if it was set in metadata - fix an embarrassing dm-cache busy-loop that caused worker threads to eat cpu even if no IO was actively being issued to the cache device * tag 'dm-4.2-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm cache: avoid calls to prealloc_free_structs() if possible dm cache: avoid preallocation if no work in writeback_some_dirty_blocks() dm cache: do not wake_worker() in free_migration() dm cache: display 'needs_check' in status if it is set dm thin: display 'needs_check' in status if it is set dm thin: stay in out-of-data-space mode once no_space_timeout expires dm: fix use after free crash due to incorrect cleanup sequence Revert "dm: only run the queue on completion if congested or no requests pending" dm btree: silence lockdep lock inversion in dm_btree_del() dm thin: allocate the cell_sort_array dynamically dm btree remove: fix bug in redistribute3
This commit is contained in:
commit
3f8476fe89
@ -258,6 +258,12 @@ cache metadata mode : ro if read-only, rw if read-write
|
||||
no further I/O will be permitted and the status will just
|
||||
contain the string 'Fail'. The userspace recovery tools
|
||||
should then be used.
|
||||
needs_check : 'needs_check' if set, '-' if not set
|
||||
A metadata operation has failed, resulting in the needs_check
|
||||
flag being set in the metadata's superblock. The metadata
|
||||
device must be deactivated and checked/repaired before the
|
||||
cache can be made fully operational again. '-' indicates
|
||||
needs_check is not set.
|
||||
|
||||
Messages
|
||||
--------
|
||||
|
@ -296,7 +296,7 @@ ii) Status
|
||||
underlying device. When this is enabled when loading the table,
|
||||
it can get disabled if the underlying device doesn't support it.
|
||||
|
||||
ro|rw
|
||||
ro|rw|out_of_data_space
|
||||
If the pool encounters certain types of device failures it will
|
||||
drop into a read-only metadata mode in which no changes to
|
||||
the pool metadata (like allocating new blocks) are permitted.
|
||||
@ -314,6 +314,13 @@ ii) Status
|
||||
module parameter can be used to change this timeout -- it
|
||||
defaults to 60 seconds but may be disabled using a value of 0.
|
||||
|
||||
needs_check
|
||||
A metadata operation has failed, resulting in the needs_check
|
||||
flag being set in the metadata's superblock. The metadata
|
||||
device must be deactivated and checked/repaired before the
|
||||
thin-pool can be made fully operational again. '-' indicates
|
||||
needs_check is not set.
|
||||
|
||||
iii) Messages
|
||||
|
||||
create_thin <dev id>
|
||||
|
@ -424,7 +424,6 @@ static void free_migration(struct dm_cache_migration *mg)
|
||||
wake_up(&cache->migration_wait);
|
||||
|
||||
mempool_free(mg, cache->migration_pool);
|
||||
wake_worker(cache);
|
||||
}
|
||||
|
||||
static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
|
||||
@ -1947,6 +1946,7 @@ static int commit_if_needed(struct cache *cache)
|
||||
|
||||
static void process_deferred_bios(struct cache *cache)
|
||||
{
|
||||
bool prealloc_used = false;
|
||||
unsigned long flags;
|
||||
struct bio_list bios;
|
||||
struct bio *bio;
|
||||
@ -1981,13 +1981,16 @@ static void process_deferred_bios(struct cache *cache)
|
||||
process_discard_bio(cache, &structs, bio);
|
||||
else
|
||||
process_bio(cache, &structs, bio);
|
||||
prealloc_used = true;
|
||||
}
|
||||
|
||||
prealloc_free_structs(cache, &structs);
|
||||
if (prealloc_used)
|
||||
prealloc_free_structs(cache, &structs);
|
||||
}
|
||||
|
||||
static void process_deferred_cells(struct cache *cache)
|
||||
{
|
||||
bool prealloc_used = false;
|
||||
unsigned long flags;
|
||||
struct dm_bio_prison_cell *cell, *tmp;
|
||||
struct list_head cells;
|
||||
@ -2015,9 +2018,11 @@ static void process_deferred_cells(struct cache *cache)
|
||||
}
|
||||
|
||||
process_cell(cache, &structs, cell);
|
||||
prealloc_used = true;
|
||||
}
|
||||
|
||||
prealloc_free_structs(cache, &structs);
|
||||
if (prealloc_used)
|
||||
prealloc_free_structs(cache, &structs);
|
||||
}
|
||||
|
||||
static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
|
||||
@ -2062,7 +2067,7 @@ static void process_deferred_writethrough_bios(struct cache *cache)
|
||||
|
||||
static void writeback_some_dirty_blocks(struct cache *cache)
|
||||
{
|
||||
int r = 0;
|
||||
bool prealloc_used = false;
|
||||
dm_oblock_t oblock;
|
||||
dm_cblock_t cblock;
|
||||
struct prealloc structs;
|
||||
@ -2072,23 +2077,21 @@ static void writeback_some_dirty_blocks(struct cache *cache)
|
||||
memset(&structs, 0, sizeof(structs));
|
||||
|
||||
while (spare_migration_bandwidth(cache)) {
|
||||
if (prealloc_data_structs(cache, &structs))
|
||||
break;
|
||||
if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
|
||||
break; /* no work to do */
|
||||
|
||||
r = policy_writeback_work(cache->policy, &oblock, &cblock, busy);
|
||||
if (r)
|
||||
break;
|
||||
|
||||
r = get_cell(cache, oblock, &structs, &old_ocell);
|
||||
if (r) {
|
||||
if (prealloc_data_structs(cache, &structs) ||
|
||||
get_cell(cache, oblock, &structs, &old_ocell)) {
|
||||
policy_set_dirty(cache->policy, oblock);
|
||||
break;
|
||||
}
|
||||
|
||||
writeback(cache, &structs, oblock, cblock, old_ocell);
|
||||
prealloc_used = true;
|
||||
}
|
||||
|
||||
prealloc_free_structs(cache, &structs);
|
||||
if (prealloc_used)
|
||||
prealloc_free_structs(cache, &structs);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------
|
||||
@ -3496,7 +3499,7 @@ static void cache_resume(struct dm_target *ti)
|
||||
* <#demotions> <#promotions> <#dirty>
|
||||
* <#features> <features>*
|
||||
* <#core args> <core args>
|
||||
* <policy name> <#policy args> <policy args>* <cache metadata mode>
|
||||
* <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check>
|
||||
*/
|
||||
static void cache_status(struct dm_target *ti, status_type_t type,
|
||||
unsigned status_flags, char *result, unsigned maxlen)
|
||||
@ -3582,6 +3585,11 @@ static void cache_status(struct dm_target *ti, status_type_t type,
|
||||
else
|
||||
DMEMIT("rw ");
|
||||
|
||||
if (dm_cache_metadata_needs_check(cache->cmd))
|
||||
DMEMIT("needs_check ");
|
||||
else
|
||||
DMEMIT("- ");
|
||||
|
||||
break;
|
||||
|
||||
case STATUSTYPE_TABLE:
|
||||
@ -3820,7 +3828,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
|
||||
static struct target_type cache_target = {
|
||||
.name = "cache",
|
||||
.version = {1, 7, 0},
|
||||
.version = {1, 8, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = cache_ctr,
|
||||
.dtr = cache_dtr,
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
@ -268,7 +269,7 @@ struct pool {
|
||||
process_mapping_fn process_prepared_mapping;
|
||||
process_mapping_fn process_prepared_discard;
|
||||
|
||||
struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE];
|
||||
struct dm_bio_prison_cell **cell_sort_array;
|
||||
};
|
||||
|
||||
static enum pool_mode get_pool_mode(struct pool *pool);
|
||||
@ -2281,18 +2282,23 @@ static void do_waker(struct work_struct *ws)
|
||||
queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
|
||||
}
|
||||
|
||||
static void notify_of_pool_mode_change_to_oods(struct pool *pool);
|
||||
|
||||
/*
|
||||
* We're holding onto IO to allow userland time to react. After the
|
||||
* timeout either the pool will have been resized (and thus back in
|
||||
* PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
|
||||
* PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space.
|
||||
*/
|
||||
static void do_no_space_timeout(struct work_struct *ws)
|
||||
{
|
||||
struct pool *pool = container_of(to_delayed_work(ws), struct pool,
|
||||
no_space_timeout);
|
||||
|
||||
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
|
||||
set_pool_mode(pool, PM_READ_ONLY);
|
||||
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
|
||||
pool->pf.error_if_no_space = true;
|
||||
notify_of_pool_mode_change_to_oods(pool);
|
||||
error_retry_list(pool);
|
||||
}
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
@ -2370,6 +2376,14 @@ static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
|
||||
dm_device_name(pool->pool_md), new_mode);
|
||||
}
|
||||
|
||||
static void notify_of_pool_mode_change_to_oods(struct pool *pool)
|
||||
{
|
||||
if (!pool->pf.error_if_no_space)
|
||||
notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
|
||||
else
|
||||
notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
|
||||
}
|
||||
|
||||
static bool passdown_enabled(struct pool_c *pt)
|
||||
{
|
||||
return pt->adjusted_pf.discard_passdown;
|
||||
@ -2454,7 +2468,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
||||
* frequently seeing this mode.
|
||||
*/
|
||||
if (old_mode != new_mode)
|
||||
notify_of_pool_mode_change(pool, "out-of-data-space");
|
||||
notify_of_pool_mode_change_to_oods(pool);
|
||||
pool->process_bio = process_bio_read_only;
|
||||
pool->process_discard = process_discard_bio;
|
||||
pool->process_cell = process_cell_read_only;
|
||||
@ -2777,6 +2791,7 @@ static void __pool_destroy(struct pool *pool)
|
||||
{
|
||||
__pool_table_remove(pool);
|
||||
|
||||
vfree(pool->cell_sort_array);
|
||||
if (dm_pool_metadata_close(pool->pmd) < 0)
|
||||
DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
|
||||
|
||||
@ -2889,6 +2904,13 @@ static struct pool *pool_create(struct mapped_device *pool_md,
|
||||
goto bad_mapping_pool;
|
||||
}
|
||||
|
||||
pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE);
|
||||
if (!pool->cell_sort_array) {
|
||||
*error = "Error allocating cell sort array";
|
||||
err_p = ERR_PTR(-ENOMEM);
|
||||
goto bad_sort_array;
|
||||
}
|
||||
|
||||
pool->ref_count = 1;
|
||||
pool->last_commit_jiffies = jiffies;
|
||||
pool->pool_md = pool_md;
|
||||
@ -2897,6 +2919,8 @@ static struct pool *pool_create(struct mapped_device *pool_md,
|
||||
|
||||
return pool;
|
||||
|
||||
bad_sort_array:
|
||||
mempool_destroy(pool->mapping_pool);
|
||||
bad_mapping_pool:
|
||||
dm_deferred_set_destroy(pool->all_io_ds);
|
||||
bad_all_io_ds:
|
||||
@ -3714,6 +3738,7 @@ static void emit_flags(struct pool_features *pf, char *result,
|
||||
* Status line is:
|
||||
* <transaction id> <used metadata sectors>/<total metadata sectors>
|
||||
* <used data sectors>/<total data sectors> <held metadata root>
|
||||
* <pool mode> <discard config> <no space config> <needs_check>
|
||||
*/
|
||||
static void pool_status(struct dm_target *ti, status_type_t type,
|
||||
unsigned status_flags, char *result, unsigned maxlen)
|
||||
@ -3815,6 +3840,11 @@ static void pool_status(struct dm_target *ti, status_type_t type,
|
||||
else
|
||||
DMEMIT("queue_if_no_space ");
|
||||
|
||||
if (dm_pool_metadata_needs_check(pool->pmd))
|
||||
DMEMIT("needs_check ");
|
||||
else
|
||||
DMEMIT("- ");
|
||||
|
||||
break;
|
||||
|
||||
case STATUSTYPE_TABLE:
|
||||
@ -3918,7 +3948,7 @@ static struct target_type pool_target = {
|
||||
.name = "thin-pool",
|
||||
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
|
||||
DM_TARGET_IMMUTABLE,
|
||||
.version = {1, 15, 0},
|
||||
.version = {1, 16, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = pool_ctr,
|
||||
.dtr = pool_dtr,
|
||||
@ -4305,7 +4335,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
|
||||
static struct target_type thin_target = {
|
||||
.name = "thin",
|
||||
.version = {1, 15, 0},
|
||||
.version = {1, 16, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = thin_ctr,
|
||||
.dtr = thin_dtr,
|
||||
|
@ -1067,13 +1067,10 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
|
||||
*/
|
||||
static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
|
||||
{
|
||||
int nr_requests_pending;
|
||||
|
||||
atomic_dec(&md->pending[rw]);
|
||||
|
||||
/* nudge anyone waiting on suspend queue */
|
||||
nr_requests_pending = md_in_flight(md);
|
||||
if (!nr_requests_pending)
|
||||
if (!md_in_flight(md))
|
||||
wake_up(&md->wait);
|
||||
|
||||
/*
|
||||
@ -1085,8 +1082,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
|
||||
if (run_queue) {
|
||||
if (md->queue->mq_ops)
|
||||
blk_mq_run_hw_queues(md->queue, true);
|
||||
else if (!nr_requests_pending ||
|
||||
(nr_requests_pending >= md->queue->nr_congestion_on))
|
||||
else
|
||||
blk_run_queue_async(md->queue);
|
||||
}
|
||||
|
||||
@ -2281,8 +2277,6 @@ static void dm_init_old_md_queue(struct mapped_device *md)
|
||||
|
||||
static void cleanup_mapped_device(struct mapped_device *md)
|
||||
{
|
||||
cleanup_srcu_struct(&md->io_barrier);
|
||||
|
||||
if (md->wq)
|
||||
destroy_workqueue(md->wq);
|
||||
if (md->kworker_task)
|
||||
@ -2294,6 +2288,8 @@ static void cleanup_mapped_device(struct mapped_device *md)
|
||||
if (md->bs)
|
||||
bioset_free(md->bs);
|
||||
|
||||
cleanup_srcu_struct(&md->io_barrier);
|
||||
|
||||
if (md->disk) {
|
||||
spin_lock(&_minor_lock);
|
||||
md->disk->private_data = NULL;
|
||||
|
@ -309,8 +309,8 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
|
||||
|
||||
if (s < 0 && nr_center < -s) {
|
||||
/* not enough in central node */
|
||||
shift(left, center, nr_center);
|
||||
s = nr_center - target;
|
||||
shift(left, center, -nr_center);
|
||||
s += nr_center;
|
||||
shift(left, right, s);
|
||||
nr_right += s;
|
||||
} else
|
||||
@ -323,7 +323,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
|
||||
if (s > 0 && nr_center < s) {
|
||||
/* not enough in central node */
|
||||
shift(center, right, nr_center);
|
||||
s = target - nr_center;
|
||||
s -= nr_center;
|
||||
shift(left, right, s);
|
||||
nr_left -= s;
|
||||
} else
|
||||
|
@ -255,7 +255,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
|
||||
int r;
|
||||
struct del_stack *s;
|
||||
|
||||
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
s = kmalloc(sizeof(*s), GFP_NOIO);
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
s->info = info;
|
||||
|
Loading…
x
Reference in New Issue
Block a user