mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-17 10:46:33 +00:00
efed9a3337
__blk_mq_sched_bio_merge() gets the ctx and hctx for the current CPU and passes the hctx to ->bio_merge(). kyber_bio_merge() then gets the ctx for the current CPU again and uses that to get the corresponding Kyber context in the passed hctx. However, the thread may be preempted between the two calls to blk_mq_get_ctx(), and the ctx returned the second time may no longer correspond to the passed hctx. This "works" accidentally most of the time, but it can cause us to read garbage if the second ctx came from an hctx with more ctx's than the first one (i.e., if ctx->index_hw[hctx->type] > hctx->nr_ctx). This manifested as this UBSAN array index out of bounds error reported by Jakub: UBSAN: array-index-out-of-bounds in ../kernel/locking/qspinlock.c:130:9 index 13106 is out of range for type 'long unsigned int [128]' Call Trace: dump_stack+0xa4/0xe5 ubsan_epilogue+0x5/0x40 __ubsan_handle_out_of_bounds.cold.13+0x2a/0x34 queued_spin_lock_slowpath+0x476/0x480 do_raw_spin_lock+0x1c2/0x1d0 kyber_bio_merge+0x112/0x180 blk_mq_submit_bio+0x1f5/0x1100 submit_bio_noacct+0x7b0/0x870 submit_bio+0xc2/0x3a0 btrfs_map_bio+0x4f0/0x9d0 btrfs_submit_data_bio+0x24e/0x310 submit_one_bio+0x7f/0xb0 submit_extent_page+0xc4/0x440 __extent_writepage_io+0x2b8/0x5e0 __extent_writepage+0x28d/0x6e0 extent_write_cache_pages+0x4d7/0x7a0 extent_writepages+0xa2/0x110 do_writepages+0x8f/0x180 __writeback_single_inode+0x99/0x7f0 writeback_sb_inodes+0x34e/0x790 __writeback_inodes_wb+0x9e/0x120 wb_writeback+0x4d2/0x660 wb_workfn+0x64d/0xa10 process_one_work+0x53a/0xa80 worker_thread+0x69/0x5b0 kthread+0x20b/0x240 ret_from_fork+0x1f/0x30 Only Kyber uses the hctx, so fix it by passing the request_queue to ->bio_merge() instead. BFQ and mq-deadline just use that, and Kyber can map the queues itself to avoid the mismatch. Fixes: a6088845c2bf ("block: kyber: make kyber more friendly with merging") Reported-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Omar Sandoval <osandov@fb.com> Link: https://lore.kernel.org/r/c7598605401a48d5cfeadebb678abd10af22b83f.1620691329.git.osandov@fb.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
180 lines
5.4 KiB
C
180 lines
5.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_ELEVATOR_H
|
|
#define _LINUX_ELEVATOR_H
|
|
|
|
#include <linux/percpu.h>
|
|
#include <linux/hashtable.h>
|
|
|
|
#ifdef CONFIG_BLOCK
|
|
|
|
struct io_cq;
|
|
struct elevator_type;
|
|
#ifdef CONFIG_BLK_DEBUG_FS
|
|
struct blk_mq_debugfs_attr;
|
|
#endif
|
|
|
|
/*
|
|
* Return values from elevator merger
|
|
*/
|
|
enum elv_merge {
|
|
ELEVATOR_NO_MERGE = 0,
|
|
ELEVATOR_FRONT_MERGE = 1,
|
|
ELEVATOR_BACK_MERGE = 2,
|
|
ELEVATOR_DISCARD_MERGE = 3,
|
|
};
|
|
|
|
struct blk_mq_alloc_data;
|
|
struct blk_mq_hw_ctx;
|
|
|
|
struct elevator_mq_ops {
|
|
int (*init_sched)(struct request_queue *, struct elevator_type *);
|
|
void (*exit_sched)(struct elevator_queue *);
|
|
int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
|
|
void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
|
|
void (*depth_updated)(struct blk_mq_hw_ctx *);
|
|
|
|
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
|
|
bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
|
|
int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
|
|
void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
|
|
void (*requests_merged)(struct request_queue *, struct request *, struct request *);
|
|
void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
|
|
void (*prepare_request)(struct request *);
|
|
void (*finish_request)(struct request *);
|
|
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
|
|
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
|
|
bool (*has_work)(struct blk_mq_hw_ctx *);
|
|
void (*completed_request)(struct request *, u64);
|
|
void (*requeue_request)(struct request *);
|
|
struct request *(*former_request)(struct request_queue *, struct request *);
|
|
struct request *(*next_request)(struct request_queue *, struct request *);
|
|
void (*init_icq)(struct io_cq *);
|
|
void (*exit_icq)(struct io_cq *);
|
|
};
|
|
|
|
#define ELV_NAME_MAX (16)
|
|
|
|
struct elv_fs_entry {
|
|
struct attribute attr;
|
|
ssize_t (*show)(struct elevator_queue *, char *);
|
|
ssize_t (*store)(struct elevator_queue *, const char *, size_t);
|
|
};
|
|
|
|
/*
|
|
* identifies an elevator type, such as AS or deadline
|
|
*/
|
|
struct elevator_type
|
|
{
|
|
/* managed by elevator core */
|
|
struct kmem_cache *icq_cache;
|
|
|
|
/* fields provided by elevator implementation */
|
|
struct elevator_mq_ops ops;
|
|
|
|
size_t icq_size; /* see iocontext.h */
|
|
size_t icq_align; /* ditto */
|
|
struct elv_fs_entry *elevator_attrs;
|
|
const char *elevator_name;
|
|
const char *elevator_alias;
|
|
const unsigned int elevator_features;
|
|
struct module *elevator_owner;
|
|
#ifdef CONFIG_BLK_DEBUG_FS
|
|
const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
|
|
const struct blk_mq_debugfs_attr *hctx_debugfs_attrs;
|
|
#endif
|
|
|
|
/* managed by elevator core */
|
|
char icq_cache_name[ELV_NAME_MAX + 6]; /* elvname + "_io_cq" */
|
|
struct list_head list;
|
|
};
|
|
|
|
#define ELV_HASH_BITS 6
|
|
|
|
void elv_rqhash_del(struct request_queue *q, struct request *rq);
|
|
void elv_rqhash_add(struct request_queue *q, struct request *rq);
|
|
void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
|
|
struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
|
|
|
|
/*
|
|
* each queue has an elevator_queue associated with it
|
|
*/
|
|
struct elevator_queue
|
|
{
|
|
struct elevator_type *type;
|
|
void *elevator_data;
|
|
struct kobject kobj;
|
|
struct mutex sysfs_lock;
|
|
unsigned int registered:1;
|
|
DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
|
|
};
|
|
|
|
/*
|
|
* block elevator interface
|
|
*/
|
|
extern enum elv_merge elv_merge(struct request_queue *, struct request **,
|
|
struct bio *);
|
|
extern void elv_merge_requests(struct request_queue *, struct request *,
|
|
struct request *);
|
|
extern void elv_merged_request(struct request_queue *, struct request *,
|
|
enum elv_merge);
|
|
extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
|
|
extern struct request *elv_former_request(struct request_queue *, struct request *);
|
|
extern struct request *elv_latter_request(struct request_queue *, struct request *);
|
|
|
|
/*
|
|
* io scheduler registration
|
|
*/
|
|
extern int elv_register(struct elevator_type *);
|
|
extern void elv_unregister(struct elevator_type *);
|
|
|
|
/*
|
|
* io scheduler sysfs switching
|
|
*/
|
|
extern ssize_t elv_iosched_show(struct request_queue *, char *);
|
|
extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
|
|
|
|
extern bool elv_bio_merge_ok(struct request *, struct bio *);
|
|
extern struct elevator_queue *elevator_alloc(struct request_queue *,
|
|
struct elevator_type *);
|
|
|
|
/*
|
|
* Helper functions.
|
|
*/
|
|
extern struct request *elv_rb_former_request(struct request_queue *, struct request *);
|
|
extern struct request *elv_rb_latter_request(struct request_queue *, struct request *);
|
|
|
|
/*
|
|
* rb support functions.
|
|
*/
|
|
extern void elv_rb_add(struct rb_root *, struct request *);
|
|
extern void elv_rb_del(struct rb_root *, struct request *);
|
|
extern struct request *elv_rb_find(struct rb_root *, sector_t);
|
|
|
|
/*
|
|
* Insertion selection
|
|
*/
|
|
#define ELEVATOR_INSERT_FRONT 1
|
|
#define ELEVATOR_INSERT_BACK 2
|
|
#define ELEVATOR_INSERT_SORT 3
|
|
#define ELEVATOR_INSERT_REQUEUE 4
|
|
#define ELEVATOR_INSERT_FLUSH 5
|
|
#define ELEVATOR_INSERT_SORT_MERGE 6
|
|
|
|
#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
|
|
#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
|
|
|
|
#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
|
|
#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist)
|
|
|
|
/*
|
|
* Elevator features.
|
|
*/
|
|
|
|
/* Supports zoned block devices sequential write constraint */
|
|
#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
|
|
/* Supports scheduling on multiple hardware queues */
|
|
#define ELEVATOR_F_MQ_AWARE (1U << 1)
|
|
|
|
#endif /* CONFIG_BLOCK */
|
|
#endif
|