mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-04 04:04:19 +00:00
d0e5845561
The data offset for a dax region needs to account for a reservation in
the resource range. Otherwise, device-dax is allowing mappings directly
into the memmap or device-info-block area with crash signatures like the
following:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
IP: get_zone_device_page+0x11/0x30
Call Trace:
follow_devmap_pmd+0x298/0x2c0
follow_page_mask+0x275/0x530
__get_user_pages+0xe3/0x750
__gfn_to_pfn_memslot+0x1b2/0x450 [kvm]
tdp_page_fault+0x130/0x280 [kvm]
kvm_mmu_page_fault+0x5f/0xf0 [kvm]
handle_ept_violation+0x94/0x180 [kvm_intel]
vmx_handle_exit+0x1d3/0x1440 [kvm_intel]
kvm_arch_vcpu_ioctl_run+0x81d/0x16a0 [kvm]
kvm_vcpu_ioctl+0x33c/0x620 [kvm]
do_vfs_ioctl+0xa2/0x5d0
SyS_ioctl+0x79/0x90
entry_SYSCALL_64_fastpath+0x1a/0xa4
Fixes: ab68f26221
("/dev/dax, pmem: direct access to persistent memory")
Link: http://lkml.kernel.org/r/147205536732.1606.8994275381938837346.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Abhilash Kumar Mulumudi <m.abhilash-kumar@hpe.com>
Reported-by: Toshi Kani <toshi.kani@hpe.com>
Tested-by: Toshi Kani <toshi.kani@hpe.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
160 lines
3.9 KiB
C
160 lines
3.9 KiB
C
/*
|
|
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of version 2 of the GNU General Public License as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*/
|
|
#include <linux/percpu-refcount.h>
|
|
#include <linux/memremap.h>
|
|
#include <linux/module.h>
|
|
#include <linux/pfn_t.h>
|
|
#include "../nvdimm/pfn.h"
|
|
#include "../nvdimm/nd.h"
|
|
#include "dax.h"
|
|
|
|
struct dax_pmem {
|
|
struct device *dev;
|
|
struct percpu_ref ref;
|
|
struct completion cmp;
|
|
};
|
|
|
|
struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
|
|
{
|
|
return container_of(ref, struct dax_pmem, ref);
|
|
}
|
|
|
|
static void dax_pmem_percpu_release(struct percpu_ref *ref)
|
|
{
|
|
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
|
|
|
dev_dbg(dax_pmem->dev, "%s\n", __func__);
|
|
complete(&dax_pmem->cmp);
|
|
}
|
|
|
|
static void dax_pmem_percpu_exit(void *data)
|
|
{
|
|
struct percpu_ref *ref = data;
|
|
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
|
|
|
dev_dbg(dax_pmem->dev, "%s\n", __func__);
|
|
percpu_ref_exit(ref);
|
|
wait_for_completion(&dax_pmem->cmp);
|
|
}
|
|
|
|
static void dax_pmem_percpu_kill(void *data)
|
|
{
|
|
struct percpu_ref *ref = data;
|
|
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
|
|
|
dev_dbg(dax_pmem->dev, "%s\n", __func__);
|
|
percpu_ref_kill(ref);
|
|
}
|
|
|
|
static int dax_pmem_probe(struct device *dev)
|
|
{
|
|
int rc;
|
|
void *addr;
|
|
struct resource res;
|
|
struct nd_pfn_sb *pfn_sb;
|
|
struct dax_pmem *dax_pmem;
|
|
struct nd_region *nd_region;
|
|
struct nd_namespace_io *nsio;
|
|
struct dax_region *dax_region;
|
|
struct nd_namespace_common *ndns;
|
|
struct nd_dax *nd_dax = to_nd_dax(dev);
|
|
struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
|
|
struct vmem_altmap __altmap, *altmap = NULL;
|
|
|
|
ndns = nvdimm_namespace_common_probe(dev);
|
|
if (IS_ERR(ndns))
|
|
return PTR_ERR(ndns);
|
|
nsio = to_nd_namespace_io(&ndns->dev);
|
|
|
|
/* parse the 'pfn' info block via ->rw_bytes */
|
|
devm_nsio_enable(dev, nsio);
|
|
altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
|
|
if (IS_ERR(altmap))
|
|
return PTR_ERR(altmap);
|
|
devm_nsio_disable(dev, nsio);
|
|
|
|
pfn_sb = nd_pfn->pfn_sb;
|
|
|
|
if (!devm_request_mem_region(dev, nsio->res.start,
|
|
resource_size(&nsio->res), dev_name(dev))) {
|
|
dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
|
|
return -EBUSY;
|
|
}
|
|
|
|
dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
|
|
if (!dax_pmem)
|
|
return -ENOMEM;
|
|
|
|
dax_pmem->dev = dev;
|
|
init_completion(&dax_pmem->cmp);
|
|
rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
|
|
GFP_KERNEL);
|
|
if (rc)
|
|
return rc;
|
|
|
|
rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit,
|
|
&dax_pmem->ref);
|
|
if (rc)
|
|
return rc;
|
|
|
|
addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
|
|
if (IS_ERR(addr))
|
|
return PTR_ERR(addr);
|
|
|
|
rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
|
|
&dax_pmem->ref);
|
|
if (rc)
|
|
return rc;
|
|
|
|
/* adjust the dax_region resource to the start of data */
|
|
res.start += le64_to_cpu(pfn_sb->dataoff);
|
|
|
|
nd_region = to_nd_region(dev->parent);
|
|
dax_region = alloc_dax_region(dev, nd_region->id, &res,
|
|
le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
|
|
if (!dax_region)
|
|
return -ENOMEM;
|
|
|
|
/* TODO: support for subdividing a dax region... */
|
|
rc = devm_create_dax_dev(dax_region, &res, 1);
|
|
|
|
/* child dax_dev instances now own the lifetime of the dax_region */
|
|
dax_region_put(dax_region);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static struct nd_device_driver dax_pmem_driver = {
|
|
.probe = dax_pmem_probe,
|
|
.drv = {
|
|
.name = "dax_pmem",
|
|
},
|
|
.type = ND_DRIVER_DAX_PMEM,
|
|
};
|
|
|
|
static int __init dax_pmem_init(void)
|
|
{
|
|
return nd_driver_register(&dax_pmem_driver);
|
|
}
|
|
module_init(dax_pmem_init);
|
|
|
|
static void __exit dax_pmem_exit(void)
|
|
{
|
|
driver_unregister(&dax_pmem_driver.drv);
|
|
}
|
|
module_exit(dax_pmem_exit);
|
|
|
|
MODULE_LICENSE("GPL v2");
|
|
MODULE_AUTHOR("Intel Corporation");
|
|
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
|