mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
dax: add .recovery_write dax_operation
Introduce dax_recovery_write() operation. The function is used to recover a dax range that contains poison. Typical use case is when a user process receives a SIGBUS with si_code BUS_MCEERR_AR indicating poison(s) in a dax range, in response, the user process issues a pwrite() to the page-aligned dax range, thus clears the poison and puts valid data in the range. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jane Chu <jane.chu@oracle.com> Link: https://lore.kernel.org/r/20220422224508.440670-6-jane.chu@oracle.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
parent
e511c4a3d2
commit
047218ec90
@ -195,6 +195,15 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(dax_zero_page_range);
|
EXPORT_SYMBOL_GPL(dax_zero_page_range);
|
||||||
|
|
||||||
|
size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||||
|
void *addr, size_t bytes, struct iov_iter *iter)
|
||||||
|
{
|
||||||
|
if (!dax_dev->ops->recovery_write)
|
||||||
|
return 0;
|
||||||
|
return dax_dev->ops->recovery_write(dax_dev, pgoff, addr, bytes, iter);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(dax_recovery_write);
|
||||||
|
|
||||||
#ifdef CONFIG_ARCH_HAS_PMEM_API
|
#ifdef CONFIG_ARCH_HAS_PMEM_API
|
||||||
void arch_wb_cache_pmem(void *addr, size_t size);
|
void arch_wb_cache_pmem(void *addr, size_t size);
|
||||||
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
|
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
|
||||||
|
@ -188,9 +188,18 @@ static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
|||||||
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
|
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t linear_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
|
||||||
|
void *addr, size_t bytes, struct iov_iter *i)
|
||||||
|
{
|
||||||
|
struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
|
||||||
|
|
||||||
|
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define linear_dax_direct_access NULL
|
#define linear_dax_direct_access NULL
|
||||||
#define linear_dax_zero_page_range NULL
|
#define linear_dax_zero_page_range NULL
|
||||||
|
#define linear_dax_recovery_write NULL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static struct target_type linear_target = {
|
static struct target_type linear_target = {
|
||||||
@ -208,6 +217,7 @@ static struct target_type linear_target = {
|
|||||||
.iterate_devices = linear_iterate_devices,
|
.iterate_devices = linear_iterate_devices,
|
||||||
.direct_access = linear_dax_direct_access,
|
.direct_access = linear_dax_direct_access,
|
||||||
.dax_zero_page_range = linear_dax_zero_page_range,
|
.dax_zero_page_range = linear_dax_zero_page_range,
|
||||||
|
.dax_recovery_write = linear_dax_recovery_write,
|
||||||
};
|
};
|
||||||
|
|
||||||
int __init dm_linear_init(void)
|
int __init dm_linear_init(void)
|
||||||
|
@ -905,9 +905,18 @@ static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
|||||||
return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
|
return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t log_writes_dax_recovery_write(struct dm_target *ti,
|
||||||
|
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
|
||||||
|
{
|
||||||
|
struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
|
||||||
|
|
||||||
|
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define log_writes_dax_direct_access NULL
|
#define log_writes_dax_direct_access NULL
|
||||||
#define log_writes_dax_zero_page_range NULL
|
#define log_writes_dax_zero_page_range NULL
|
||||||
|
#define log_writes_dax_recovery_write NULL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static struct target_type log_writes_target = {
|
static struct target_type log_writes_target = {
|
||||||
@ -925,6 +934,7 @@ static struct target_type log_writes_target = {
|
|||||||
.io_hints = log_writes_io_hints,
|
.io_hints = log_writes_io_hints,
|
||||||
.direct_access = log_writes_dax_direct_access,
|
.direct_access = log_writes_dax_direct_access,
|
||||||
.dax_zero_page_range = log_writes_dax_zero_page_range,
|
.dax_zero_page_range = log_writes_dax_zero_page_range,
|
||||||
|
.dax_recovery_write = log_writes_dax_recovery_write,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int __init dm_log_writes_init(void)
|
static int __init dm_log_writes_init(void)
|
||||||
|
@ -331,9 +331,18 @@ static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
|
|||||||
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
|
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t stripe_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
|
||||||
|
void *addr, size_t bytes, struct iov_iter *i)
|
||||||
|
{
|
||||||
|
struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
|
||||||
|
|
||||||
|
return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define stripe_dax_direct_access NULL
|
#define stripe_dax_direct_access NULL
|
||||||
#define stripe_dax_zero_page_range NULL
|
#define stripe_dax_zero_page_range NULL
|
||||||
|
#define stripe_dax_recovery_write NULL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -470,6 +479,7 @@ static struct target_type stripe_target = {
|
|||||||
.io_hints = stripe_io_hints,
|
.io_hints = stripe_io_hints,
|
||||||
.direct_access = stripe_dax_direct_access,
|
.direct_access = stripe_dax_direct_access,
|
||||||
.dax_zero_page_range = stripe_dax_zero_page_range,
|
.dax_zero_page_range = stripe_dax_zero_page_range,
|
||||||
|
.dax_recovery_write = stripe_dax_recovery_write,
|
||||||
};
|
};
|
||||||
|
|
||||||
int __init dm_stripe_init(void)
|
int __init dm_stripe_init(void)
|
||||||
|
@ -1147,6 +1147,25 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t dm_dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||||
|
void *addr, size_t bytes, struct iov_iter *i)
|
||||||
|
{
|
||||||
|
struct mapped_device *md = dax_get_private(dax_dev);
|
||||||
|
sector_t sector = pgoff * PAGE_SECTORS;
|
||||||
|
struct dm_target *ti;
|
||||||
|
int srcu_idx;
|
||||||
|
long ret = 0;
|
||||||
|
|
||||||
|
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
|
||||||
|
if (!ti || !ti->type->dax_recovery_write)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = ti->type->dax_recovery_write(ti, pgoff, addr, bytes, i);
|
||||||
|
out:
|
||||||
|
dm_put_live_table(md, srcu_idx);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A target may call dm_accept_partial_bio only from the map routine. It is
|
* A target may call dm_accept_partial_bio only from the map routine. It is
|
||||||
* allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
|
* allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
|
||||||
@ -3147,6 +3166,7 @@ static const struct block_device_operations dm_rq_blk_dops = {
|
|||||||
static const struct dax_operations dm_dax_ops = {
|
static const struct dax_operations dm_dax_ops = {
|
||||||
.direct_access = dm_dax_direct_access,
|
.direct_access = dm_dax_direct_access,
|
||||||
.zero_page_range = dm_dax_zero_page_range,
|
.zero_page_range = dm_dax_zero_page_range,
|
||||||
|
.recovery_write = dm_dax_recovery_write,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -287,9 +287,16 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
|
|||||||
return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn);
|
return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||||
|
void *addr, size_t bytes, struct iov_iter *i)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct dax_operations pmem_dax_ops = {
|
static const struct dax_operations pmem_dax_ops = {
|
||||||
.direct_access = pmem_dax_direct_access,
|
.direct_access = pmem_dax_direct_access,
|
||||||
.zero_page_range = pmem_dax_zero_page_range,
|
.zero_page_range = pmem_dax_zero_page_range,
|
||||||
|
.recovery_write = pmem_recovery_write,
|
||||||
};
|
};
|
||||||
|
|
||||||
static ssize_t write_cache_show(struct device *dev,
|
static ssize_t write_cache_show(struct device *dev,
|
||||||
|
13
fs/dax.c
13
fs/dax.c
@ -1240,6 +1240,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
|||||||
const size_t size = ALIGN(length + offset, PAGE_SIZE);
|
const size_t size = ALIGN(length + offset, PAGE_SIZE);
|
||||||
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
|
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
|
||||||
ssize_t map_len;
|
ssize_t map_len;
|
||||||
|
bool recovery = false;
|
||||||
void *kaddr;
|
void *kaddr;
|
||||||
|
|
||||||
if (fatal_signal_pending(current)) {
|
if (fatal_signal_pending(current)) {
|
||||||
@ -1249,6 +1250,13 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
|||||||
|
|
||||||
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
|
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
|
||||||
DAX_ACCESS, &kaddr, NULL);
|
DAX_ACCESS, &kaddr, NULL);
|
||||||
|
if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
|
||||||
|
map_len = dax_direct_access(dax_dev, pgoff,
|
||||||
|
PHYS_PFN(size), DAX_RECOVERY_WRITE,
|
||||||
|
&kaddr, NULL);
|
||||||
|
if (map_len > 0)
|
||||||
|
recovery = true;
|
||||||
|
}
|
||||||
if (map_len < 0) {
|
if (map_len < 0) {
|
||||||
ret = map_len;
|
ret = map_len;
|
||||||
break;
|
break;
|
||||||
@ -1260,7 +1268,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
|
|||||||
if (map_len > end - pos)
|
if (map_len > end - pos)
|
||||||
map_len = end - pos;
|
map_len = end - pos;
|
||||||
|
|
||||||
if (iov_iter_rw(iter) == WRITE)
|
if (recovery)
|
||||||
|
xfer = dax_recovery_write(dax_dev, pgoff, kaddr,
|
||||||
|
map_len, iter);
|
||||||
|
else if (iov_iter_rw(iter) == WRITE)
|
||||||
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
|
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
|
||||||
map_len, iter);
|
map_len, iter);
|
||||||
else
|
else
|
||||||
|
@ -35,6 +35,12 @@ struct dax_operations {
|
|||||||
sector_t, sector_t);
|
sector_t, sector_t);
|
||||||
/* zero_page_range: required operation. Zero page range */
|
/* zero_page_range: required operation. Zero page range */
|
||||||
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
|
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
|
||||||
|
/*
|
||||||
|
* recovery_write: recover a poisoned range by DAX device driver
|
||||||
|
* capable of clearing poison.
|
||||||
|
*/
|
||||||
|
size_t (*recovery_write)(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||||
|
void *addr, size_t bytes, struct iov_iter *iter);
|
||||||
};
|
};
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_DAX)
|
#if IS_ENABLED(CONFIG_DAX)
|
||||||
@ -45,6 +51,8 @@ void dax_write_cache(struct dax_device *dax_dev, bool wc);
|
|||||||
bool dax_write_cache_enabled(struct dax_device *dax_dev);
|
bool dax_write_cache_enabled(struct dax_device *dax_dev);
|
||||||
bool dax_synchronous(struct dax_device *dax_dev);
|
bool dax_synchronous(struct dax_device *dax_dev);
|
||||||
void set_dax_synchronous(struct dax_device *dax_dev);
|
void set_dax_synchronous(struct dax_device *dax_dev);
|
||||||
|
size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||||
|
void *addr, size_t bytes, struct iov_iter *i);
|
||||||
/*
|
/*
|
||||||
* Check if given mapping is supported by the file / underlying device.
|
* Check if given mapping is supported by the file / underlying device.
|
||||||
*/
|
*/
|
||||||
@ -92,6 +100,11 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
|
|||||||
{
|
{
|
||||||
return !(vma->vm_flags & VM_SYNC);
|
return !(vma->vm_flags & VM_SYNC);
|
||||||
}
|
}
|
||||||
|
static inline size_t dax_recovery_write(struct dax_device *dax_dev,
|
||||||
|
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void set_dax_nocache(struct dax_device *dax_dev);
|
void set_dax_nocache(struct dax_device *dax_dev);
|
||||||
|
@ -152,6 +152,14 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
|
|||||||
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
|
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
|
||||||
size_t nr_pages);
|
size_t nr_pages);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns:
|
||||||
|
* != 0 : number of bytes transferred
|
||||||
|
* 0 : recovery write failed
|
||||||
|
*/
|
||||||
|
typedef size_t (*dm_dax_recovery_write_fn)(struct dm_target *ti, pgoff_t pgoff,
|
||||||
|
void *addr, size_t bytes, struct iov_iter *i);
|
||||||
|
|
||||||
void dm_error(const char *message);
|
void dm_error(const char *message);
|
||||||
|
|
||||||
struct dm_dev {
|
struct dm_dev {
|
||||||
@ -201,6 +209,7 @@ struct target_type {
|
|||||||
dm_io_hints_fn io_hints;
|
dm_io_hints_fn io_hints;
|
||||||
dm_dax_direct_access_fn direct_access;
|
dm_dax_direct_access_fn direct_access;
|
||||||
dm_dax_zero_page_range_fn dax_zero_page_range;
|
dm_dax_zero_page_range_fn dax_zero_page_range;
|
||||||
|
dm_dax_recovery_write_fn dax_recovery_write;
|
||||||
|
|
||||||
/* For internal device-mapper use. */
|
/* For internal device-mapper use. */
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
|
Loading…
Reference in New Issue
Block a user