mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
libnvdimm for 4.15
* Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable 'userspace flush' of persistent memory updates via filesystem-dax mappings. It arranges for any filesystem metadata updates that may be required to satisfy a write fault to also be flushed ("on disk") before the kernel returns to userspace from the fault handler. Effectively every write-fault that dirties metadata completes an fsync() before returning from the fault handler. The new MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag is validated as supported by the filesystem's ->mmap() file operation. * Add support for the standard ACPI 6.2 label access methods that replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods. This enables interoperability with environments that only implement the standardized methods. * Add support for the ACPI 6.2 NVDIMM media error injection methods. * Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for latch last shutdown status, firmware update, SMART error injection, and SMART alarm threshold control. * Cleanup physical address information disclosures to be root-only. * Fix revalidation of the DIMM "locked label area" status to support dynamic unlock of the label area. * Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA (system-physical-address) command and error injection commands. Acknowledgements that came after the commits were pushed to -next:957ac8c421
dax: fix PMD faults on zero-length files Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>a39e596baa
xfs: support for synchronous DAX faults Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>7b565c9f96
xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault() Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJaDfvcAAoJEB7SkWpmfYgCk7sP/2qJhBH+VTTdg2osDnhAdAhI co/AGEmsHFlUCMBb/Ek7UnMAmhBYiJU2q4ywPsNFBpusXpMlqNy5Iwo7k4/wQHE/ SJcIM0g4zg0ViFuUhwV+C2T0R5UzFR8JLd9EYWj/YS6aJpurtotm5l4UStaM0Hzo AhxSXJLrBDuqCpbOxbctfiGEmdRL7aRfBEAARTNRKBn/iXxJUcYHlp62rtXQS+t4 I6LC/URCWTNTTMGmzW6TRsgSD9WMfd19xKcGzN3qL6ee0KFccxN4ctFqHA/sFGOh iYLeR0XJUjJxyp+PkWGteXPVZL0Kj3bD/lSTG+Co5bm/ra8a/sh3TSFfgFyoBZD1 EqMN8Ryf80hGp3FabeH2Iw2SviYPZpHSWgjddjxLD0RA6OmpzINc+Wm8eqApjMME sbZDTOijiab4QMQ0XamF4GuDHyQtawv5Y/w2Ehhl1tmiqW+5tKhsKqxkQt+/V3Yt RTVSRe2Pkway66b+cD64IdQ6L2tyonPnmi5IzgkKOhlOEGomy+4/U2Jt2bMbhzq6 ymszKmXp2XI8P06wU8sHrIUeXO5I9qoKn/fZA73Eb8aIzgJe3tBE/5+Ab7RG6HB9 1OVfcMWoXU1gNgNktTs63X1Lsg4aW9kt/K4fPHHcqUcaliEJpJTlAbg9GLF2buoW nQ+0fTRgMRihE3ZA0Fs3 =h2vZ -----END PGP SIGNATURE----- Merge tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm Pull libnvdimm and dax updates from Dan Williams: "Save for a few late fixes, all of these commits have shipped in -next releases since before the merge window opened, and 0day has given a build success notification. The ext4 touches came from Jan, and the xfs touches have Darrick's reviewed-by. An xfstest for the MAP_SYNC feature has been through a few round of reviews and is on track to be merged. - Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable 'userspace flush' of persistent memory updates via filesystem-dax mappings. It arranges for any filesystem metadata updates that may be required to satisfy a write fault to also be flushed ("on disk") before the kernel returns to userspace from the fault handler. Effectively every write-fault that dirties metadata completes an fsync() before returning from the fault handler. The new MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag is validated as supported by the filesystem's ->mmap() file operation. - Add support for the standard ACPI 6.2 label access methods that replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods. This enables interoperability with environments that only implement the standardized methods. - Add support for the ACPI 6.2 NVDIMM media error injection methods. - Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for latch last shutdown status, firmware update, SMART error injection, and SMART alarm threshold control. - Cleanup physical address information disclosures to be root-only. - Fix revalidation of the DIMM "locked label area" status to support dynamic unlock of the label area. - Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA (system-physical-address) command and error injection commands. Acknowledgements that came after the commits were pushed to -next: -957ac8c421
("dax: fix PMD faults on zero-length files"): Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> -a39e596baa
("xfs: support for synchronous DAX faults") and7b565c9f96
("xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()") Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>" * tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (49 commits) acpi, nfit: add 'Enable Latch System Shutdown Status' command support dax: fix general protection fault in dax_alloc_inode dax: fix PMD faults on zero-length files dax: stop requiring a live device for dax_flush() brd: remove dax support dax: quiet bdev_dax_supported() fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core tools/testing/nvdimm: unit test clear-error commands acpi, nfit: validate commands against the device type tools/testing/nvdimm: stricter bounds checking for error injection commands xfs: support for synchronous DAX faults xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault() ext4: Support for synchronous DAX faults ext4: Simplify error handling in ext4_dax_huge_fault() dax: Implement dax_finish_sync_fault() dax, iomap: Add support for synchronous faults mm: Define MAP_SYNC and VM_SYNC flags dax: Allow tuning whether dax_insert_mapping_entry() dirties entry dax: Allow dax_iomap_fault() to return pfn dax: Fix comment describing dax_iomap_fault() ...
This commit is contained in:
commit
a3841f94c7
@ -4208,7 +4208,7 @@ L: linux-i2c@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/i2c/busses/i2c-diolan-u2c.c
|
||||
|
||||
DIRECT ACCESS (DAX)
|
||||
FILESYSTEM DIRECT ACCESS (DAX)
|
||||
M: Matthew Wilcox <mawilcox@microsoft.com>
|
||||
M: Ross Zwisler <ross.zwisler@linux.intel.com>
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
@ -4217,6 +4217,12 @@ F: fs/dax.c
|
||||
F: include/linux/dax.h
|
||||
F: include/trace/events/fs_dax.h
|
||||
|
||||
DEVICE DIRECT ACCESS (DAX)
|
||||
M: Dan Williams <dan.j.williams@intel.com>
|
||||
L: linux-nvdimm@lists.01.org
|
||||
S: Supported
|
||||
F: drivers/dax/
|
||||
|
||||
DIRECTORY NOTIFICATION (DNOTIFY)
|
||||
M: Jan Kara <jack@suse.cz>
|
||||
R: Amir Goldstein <amir73il@gmail.com>
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#define MAP_SHARED 0x01 /* Share changes */
|
||||
#define MAP_PRIVATE 0x02 /* Changes are private */
|
||||
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
|
||||
#define MAP_TYPE 0x0f /* Mask for type of mapping (OSF/1 is _wrong_) */
|
||||
#define MAP_FIXED 0x100 /* Interpret addr exactly */
|
||||
#define MAP_ANONYMOUS 0x10 /* don't use a file */
|
||||
|
@ -29,6 +29,7 @@
|
||||
*/
|
||||
#define MAP_SHARED 0x001 /* Share changes */
|
||||
#define MAP_PRIVATE 0x002 /* Changes are private */
|
||||
#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */
|
||||
#define MAP_TYPE 0x00f /* Mask for type of mapping */
|
||||
#define MAP_FIXED 0x010 /* Interpret addr exactly */
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#define MAP_SHARED 0x01 /* Share changes */
|
||||
#define MAP_PRIVATE 0x02 /* Changes are private */
|
||||
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
|
||||
#define MAP_TYPE 0x03 /* Mask for type of mapping */
|
||||
#define MAP_FIXED 0x04 /* Interpret addr exactly */
|
||||
#define MAP_ANONYMOUS 0x10 /* don't use a file */
|
||||
|
@ -36,6 +36,7 @@
|
||||
*/
|
||||
#define MAP_SHARED 0x001 /* Share changes */
|
||||
#define MAP_PRIVATE 0x002 /* Changes are private */
|
||||
#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */
|
||||
#define MAP_TYPE 0x00f /* Mask for type of mapping */
|
||||
#define MAP_FIXED 0x010 /* Interpret addr exactly */
|
||||
|
||||
|
@ -183,13 +183,33 @@ static int xlat_bus_status(void *buf, unsigned int cmd, u32 status)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xlat_nvdimm_status(void *buf, unsigned int cmd, u32 status)
|
||||
#define ACPI_LABELS_LOCKED 3
|
||||
|
||||
static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd,
|
||||
u32 status)
|
||||
{
|
||||
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
|
||||
|
||||
switch (cmd) {
|
||||
case ND_CMD_GET_CONFIG_SIZE:
|
||||
/*
|
||||
* In the _LSI, _LSR, _LSW case the locked status is
|
||||
* communicated via the read/write commands
|
||||
*/
|
||||
if (nfit_mem->has_lsi)
|
||||
break;
|
||||
|
||||
if (status >> 16 & ND_CONFIG_LOCKED)
|
||||
return -EACCES;
|
||||
break;
|
||||
case ND_CMD_GET_CONFIG_DATA:
|
||||
if (nfit_mem->has_lsr && status == ACPI_LABELS_LOCKED)
|
||||
return -EACCES;
|
||||
break;
|
||||
case ND_CMD_SET_CONFIG_DATA:
|
||||
if (nfit_mem->has_lsw && status == ACPI_LABELS_LOCKED)
|
||||
return -EACCES;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -205,13 +225,182 @@ static int xlat_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd,
|
||||
{
|
||||
if (!nvdimm)
|
||||
return xlat_bus_status(buf, cmd, status);
|
||||
return xlat_nvdimm_status(buf, cmd, status);
|
||||
return xlat_nvdimm_status(nvdimm, buf, cmd, status);
|
||||
}
|
||||
|
||||
/* convert _LS{I,R} packages to the buffer object acpi_nfit_ctl expects */
|
||||
static union acpi_object *pkg_to_buf(union acpi_object *pkg)
|
||||
{
|
||||
int i;
|
||||
void *dst;
|
||||
size_t size = 0;
|
||||
union acpi_object *buf = NULL;
|
||||
|
||||
if (pkg->type != ACPI_TYPE_PACKAGE) {
|
||||
WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n",
|
||||
pkg->type);
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0; i < pkg->package.count; i++) {
|
||||
union acpi_object *obj = &pkg->package.elements[i];
|
||||
|
||||
if (obj->type == ACPI_TYPE_INTEGER)
|
||||
size += 4;
|
||||
else if (obj->type == ACPI_TYPE_BUFFER)
|
||||
size += obj->buffer.length;
|
||||
else {
|
||||
WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n",
|
||||
obj->type);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
buf = ACPI_ALLOCATE(sizeof(*buf) + size);
|
||||
if (!buf)
|
||||
goto err;
|
||||
|
||||
dst = buf + 1;
|
||||
buf->type = ACPI_TYPE_BUFFER;
|
||||
buf->buffer.length = size;
|
||||
buf->buffer.pointer = dst;
|
||||
for (i = 0; i < pkg->package.count; i++) {
|
||||
union acpi_object *obj = &pkg->package.elements[i];
|
||||
|
||||
if (obj->type == ACPI_TYPE_INTEGER) {
|
||||
memcpy(dst, &obj->integer.value, 4);
|
||||
dst += 4;
|
||||
} else if (obj->type == ACPI_TYPE_BUFFER) {
|
||||
memcpy(dst, obj->buffer.pointer, obj->buffer.length);
|
||||
dst += obj->buffer.length;
|
||||
}
|
||||
}
|
||||
err:
|
||||
ACPI_FREE(pkg);
|
||||
return buf;
|
||||
}
|
||||
|
||||
static union acpi_object *int_to_buf(union acpi_object *integer)
|
||||
{
|
||||
union acpi_object *buf = ACPI_ALLOCATE(sizeof(*buf) + 4);
|
||||
void *dst = NULL;
|
||||
|
||||
if (!buf)
|
||||
goto err;
|
||||
|
||||
if (integer->type != ACPI_TYPE_INTEGER) {
|
||||
WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n",
|
||||
integer->type);
|
||||
goto err;
|
||||
}
|
||||
|
||||
dst = buf + 1;
|
||||
buf->type = ACPI_TYPE_BUFFER;
|
||||
buf->buffer.length = 4;
|
||||
buf->buffer.pointer = dst;
|
||||
memcpy(dst, &integer->integer.value, 4);
|
||||
err:
|
||||
ACPI_FREE(integer);
|
||||
return buf;
|
||||
}
|
||||
|
||||
static union acpi_object *acpi_label_write(acpi_handle handle, u32 offset,
|
||||
u32 len, void *data)
|
||||
{
|
||||
acpi_status rc;
|
||||
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
struct acpi_object_list input = {
|
||||
.count = 3,
|
||||
.pointer = (union acpi_object []) {
|
||||
[0] = {
|
||||
.integer.type = ACPI_TYPE_INTEGER,
|
||||
.integer.value = offset,
|
||||
},
|
||||
[1] = {
|
||||
.integer.type = ACPI_TYPE_INTEGER,
|
||||
.integer.value = len,
|
||||
},
|
||||
[2] = {
|
||||
.buffer.type = ACPI_TYPE_BUFFER,
|
||||
.buffer.pointer = data,
|
||||
.buffer.length = len,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
rc = acpi_evaluate_object(handle, "_LSW", &input, &buf);
|
||||
if (ACPI_FAILURE(rc))
|
||||
return NULL;
|
||||
return int_to_buf(buf.pointer);
|
||||
}
|
||||
|
||||
static union acpi_object *acpi_label_read(acpi_handle handle, u32 offset,
|
||||
u32 len)
|
||||
{
|
||||
acpi_status rc;
|
||||
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
struct acpi_object_list input = {
|
||||
.count = 2,
|
||||
.pointer = (union acpi_object []) {
|
||||
[0] = {
|
||||
.integer.type = ACPI_TYPE_INTEGER,
|
||||
.integer.value = offset,
|
||||
},
|
||||
[1] = {
|
||||
.integer.type = ACPI_TYPE_INTEGER,
|
||||
.integer.value = len,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
rc = acpi_evaluate_object(handle, "_LSR", &input, &buf);
|
||||
if (ACPI_FAILURE(rc))
|
||||
return NULL;
|
||||
return pkg_to_buf(buf.pointer);
|
||||
}
|
||||
|
||||
static union acpi_object *acpi_label_info(acpi_handle handle)
|
||||
{
|
||||
acpi_status rc;
|
||||
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
|
||||
rc = acpi_evaluate_object(handle, "_LSI", NULL, &buf);
|
||||
if (ACPI_FAILURE(rc))
|
||||
return NULL;
|
||||
return pkg_to_buf(buf.pointer);
|
||||
}
|
||||
|
||||
static u8 nfit_dsm_revid(unsigned family, unsigned func)
|
||||
{
|
||||
static const u8 revid_table[NVDIMM_FAMILY_MAX+1][32] = {
|
||||
[NVDIMM_FAMILY_INTEL] = {
|
||||
[NVDIMM_INTEL_GET_MODES] = 2,
|
||||
[NVDIMM_INTEL_GET_FWINFO] = 2,
|
||||
[NVDIMM_INTEL_START_FWUPDATE] = 2,
|
||||
[NVDIMM_INTEL_SEND_FWUPDATE] = 2,
|
||||
[NVDIMM_INTEL_FINISH_FWUPDATE] = 2,
|
||||
[NVDIMM_INTEL_QUERY_FWUPDATE] = 2,
|
||||
[NVDIMM_INTEL_SET_THRESHOLD] = 2,
|
||||
[NVDIMM_INTEL_INJECT_ERROR] = 2,
|
||||
},
|
||||
};
|
||||
u8 id;
|
||||
|
||||
if (family > NVDIMM_FAMILY_MAX)
|
||||
return 0;
|
||||
if (func > 31)
|
||||
return 0;
|
||||
id = revid_table[family][func];
|
||||
if (id == 0)
|
||||
return 1; /* default */
|
||||
return id;
|
||||
}
|
||||
|
||||
int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
||||
unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
|
||||
{
|
||||
struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
|
||||
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
|
||||
union acpi_object in_obj, in_buf, *out_obj;
|
||||
const struct nd_cmd_desc *desc = NULL;
|
||||
struct device *dev = acpi_desc->dev;
|
||||
@ -235,7 +424,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
||||
}
|
||||
|
||||
if (nvdimm) {
|
||||
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
|
||||
struct acpi_device *adev = nfit_mem->adev;
|
||||
|
||||
if (!adev)
|
||||
@ -294,7 +482,29 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
||||
in_buf.buffer.pointer,
|
||||
min_t(u32, 256, in_buf.buffer.length), true);
|
||||
|
||||
out_obj = acpi_evaluate_dsm(handle, guid, 1, func, &in_obj);
|
||||
/* call the BIOS, prefer the named methods over _DSM if available */
|
||||
if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsi)
|
||||
out_obj = acpi_label_info(handle);
|
||||
else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) {
|
||||
struct nd_cmd_get_config_data_hdr *p = buf;
|
||||
|
||||
out_obj = acpi_label_read(handle, p->in_offset, p->in_length);
|
||||
} else if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA
|
||||
&& nfit_mem->has_lsw) {
|
||||
struct nd_cmd_set_config_hdr *p = buf;
|
||||
|
||||
out_obj = acpi_label_write(handle, p->in_offset, p->in_length,
|
||||
p->in_buf);
|
||||
} else {
|
||||
u8 revid;
|
||||
|
||||
if (nvdimm)
|
||||
revid = nfit_dsm_revid(nfit_mem->family, func);
|
||||
else
|
||||
revid = 1;
|
||||
out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj);
|
||||
}
|
||||
|
||||
if (!out_obj) {
|
||||
dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
|
||||
cmd_name);
|
||||
@ -356,8 +566,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
||||
* Set fw_status for all the commands with a known format to be
|
||||
* later interpreted by xlat_status().
|
||||
*/
|
||||
if (i >= 1 && ((cmd >= ND_CMD_ARS_CAP && cmd <= ND_CMD_CLEAR_ERROR)
|
||||
|| (cmd >= ND_CMD_SMART && cmd <= ND_CMD_VENDOR)))
|
||||
if (i >= 1 && ((!nvdimm && cmd >= ND_CMD_ARS_CAP
|
||||
&& cmd <= ND_CMD_CLEAR_ERROR)
|
||||
|| (nvdimm && cmd >= ND_CMD_SMART
|
||||
&& cmd <= ND_CMD_VENDOR)))
|
||||
fw_status = *(u32 *) out_obj->buffer.pointer;
|
||||
|
||||
if (offset + in_buf.buffer.length < buf_len) {
|
||||
@ -1431,6 +1643,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
|
||||
{
|
||||
struct acpi_device *adev, *adev_dimm;
|
||||
struct device *dev = acpi_desc->dev;
|
||||
union acpi_object *obj;
|
||||
unsigned long dsm_mask;
|
||||
const guid_t *guid;
|
||||
int i;
|
||||
@ -1463,7 +1676,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
|
||||
* different command sets. Note, that checking for function0 (bit0)
|
||||
* tells us if any commands are reachable through this GUID.
|
||||
*/
|
||||
for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
|
||||
for (i = 0; i <= NVDIMM_FAMILY_MAX; i++)
|
||||
if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
|
||||
if (family < 0 || i == default_dsm_family)
|
||||
family = i;
|
||||
@ -1473,7 +1686,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
|
||||
if (override_dsm_mask && !disable_vendor_specific)
|
||||
dsm_mask = override_dsm_mask;
|
||||
else if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
|
||||
dsm_mask = 0x3fe;
|
||||
dsm_mask = NVDIMM_INTEL_CMDMASK;
|
||||
if (disable_vendor_specific)
|
||||
dsm_mask &= ~(1 << ND_CMD_VENDOR);
|
||||
} else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) {
|
||||
@ -1493,9 +1706,32 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
|
||||
|
||||
guid = to_nfit_uuid(nfit_mem->family);
|
||||
for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
|
||||
if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i))
|
||||
if (acpi_check_dsm(adev_dimm->handle, guid,
|
||||
nfit_dsm_revid(nfit_mem->family, i),
|
||||
1ULL << i))
|
||||
set_bit(i, &nfit_mem->dsm_mask);
|
||||
|
||||
obj = acpi_label_info(adev_dimm->handle);
|
||||
if (obj) {
|
||||
ACPI_FREE(obj);
|
||||
nfit_mem->has_lsi = 1;
|
||||
dev_dbg(dev, "%s: has _LSI\n", dev_name(&adev_dimm->dev));
|
||||
}
|
||||
|
||||
obj = acpi_label_read(adev_dimm->handle, 0, 0);
|
||||
if (obj) {
|
||||
ACPI_FREE(obj);
|
||||
nfit_mem->has_lsr = 1;
|
||||
dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
|
||||
}
|
||||
|
||||
obj = acpi_label_write(adev_dimm->handle, 0, 0, NULL);
|
||||
if (obj) {
|
||||
ACPI_FREE(obj);
|
||||
nfit_mem->has_lsw = 1;
|
||||
dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1571,8 +1807,21 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
|
||||
* userspace interface.
|
||||
*/
|
||||
cmd_mask = 1UL << ND_CMD_CALL;
|
||||
if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
|
||||
cmd_mask |= nfit_mem->dsm_mask;
|
||||
if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
|
||||
/*
|
||||
* These commands have a 1:1 correspondence
|
||||
* between DSM payload and libnvdimm ioctl
|
||||
* payload format.
|
||||
*/
|
||||
cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
|
||||
}
|
||||
|
||||
if (nfit_mem->has_lsi)
|
||||
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
|
||||
if (nfit_mem->has_lsr)
|
||||
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
|
||||
if (nfit_mem->has_lsw)
|
||||
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
|
||||
|
||||
flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
|
||||
: NULL;
|
||||
@ -1645,6 +1894,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
|
||||
int i;
|
||||
|
||||
nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
|
||||
nd_desc->bus_dsm_mask = acpi_desc->bus_nfit_cmd_force_en;
|
||||
adev = to_acpi_dev(acpi_desc);
|
||||
if (!adev)
|
||||
return;
|
||||
@ -2239,7 +2489,7 @@ static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc,
|
||||
if (ars_status->out_length
|
||||
< 44 + sizeof(struct nd_ars_record) * (i + 1))
|
||||
break;
|
||||
rc = nvdimm_bus_add_poison(nvdimm_bus,
|
||||
rc = nvdimm_bus_add_badrange(nvdimm_bus,
|
||||
ars_status->records[i].err_address,
|
||||
ars_status->records[i].length);
|
||||
if (rc)
|
||||
|
@ -67,7 +67,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
|
||||
continue;
|
||||
|
||||
/* If this fails due to an -ENOMEM, there is little we can do */
|
||||
nvdimm_bus_add_poison(acpi_desc->nvdimm_bus,
|
||||
nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
|
||||
ALIGN(mce->addr, L1_CACHE_BYTES),
|
||||
L1_CACHE_BYTES);
|
||||
nvdimm_region_notify(nfit_spa->nd_region,
|
||||
|
@ -24,7 +24,7 @@
|
||||
/* ACPI 6.1 */
|
||||
#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
|
||||
|
||||
/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */
|
||||
/* http://pmem.io/documents/NVDIMM_DSM_Interface-V1.6.pdf */
|
||||
#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
|
||||
|
||||
/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
|
||||
@ -38,6 +38,37 @@
|
||||
| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
|
||||
| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
|
||||
|
||||
#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT
|
||||
|
||||
#define NVDIMM_STANDARD_CMDMASK \
|
||||
(1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
|
||||
| 1 << ND_CMD_GET_CONFIG_SIZE | 1 << ND_CMD_GET_CONFIG_DATA \
|
||||
| 1 << ND_CMD_SET_CONFIG_DATA | 1 << ND_CMD_VENDOR_EFFECT_LOG_SIZE \
|
||||
| 1 << ND_CMD_VENDOR_EFFECT_LOG | 1 << ND_CMD_VENDOR)
|
||||
|
||||
/*
|
||||
* Command numbers that the kernel needs to know about to handle
|
||||
* non-default DSM revision ids
|
||||
*/
|
||||
enum nvdimm_family_cmds {
|
||||
NVDIMM_INTEL_LATCH_SHUTDOWN = 10,
|
||||
NVDIMM_INTEL_GET_MODES = 11,
|
||||
NVDIMM_INTEL_GET_FWINFO = 12,
|
||||
NVDIMM_INTEL_START_FWUPDATE = 13,
|
||||
NVDIMM_INTEL_SEND_FWUPDATE = 14,
|
||||
NVDIMM_INTEL_FINISH_FWUPDATE = 15,
|
||||
NVDIMM_INTEL_QUERY_FWUPDATE = 16,
|
||||
NVDIMM_INTEL_SET_THRESHOLD = 17,
|
||||
NVDIMM_INTEL_INJECT_ERROR = 18,
|
||||
};
|
||||
|
||||
#define NVDIMM_INTEL_CMDMASK \
|
||||
(NVDIMM_STANDARD_CMDMASK | 1 << NVDIMM_INTEL_GET_MODES \
|
||||
| 1 << NVDIMM_INTEL_GET_FWINFO | 1 << NVDIMM_INTEL_START_FWUPDATE \
|
||||
| 1 << NVDIMM_INTEL_SEND_FWUPDATE | 1 << NVDIMM_INTEL_FINISH_FWUPDATE \
|
||||
| 1 << NVDIMM_INTEL_QUERY_FWUPDATE | 1 << NVDIMM_INTEL_SET_THRESHOLD \
|
||||
| 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN)
|
||||
|
||||
enum nfit_uuids {
|
||||
/* for simplicity alias the uuid index with the family id */
|
||||
NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
|
||||
@ -140,6 +171,9 @@ struct nfit_mem {
|
||||
struct resource *flush_wpq;
|
||||
unsigned long dsm_mask;
|
||||
int family;
|
||||
u32 has_lsi:1;
|
||||
u32 has_lsr:1;
|
||||
u32 has_lsw:1;
|
||||
};
|
||||
|
||||
struct acpi_nfit_desc {
|
||||
@ -167,6 +201,7 @@ struct acpi_nfit_desc {
|
||||
unsigned int init_complete:1;
|
||||
unsigned long dimm_cmd_force_en;
|
||||
unsigned long bus_cmd_force_en;
|
||||
unsigned long bus_nfit_cmd_force_en;
|
||||
int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
|
||||
void *iobuf, u64 len, int rw);
|
||||
};
|
||||
|
@ -302,7 +302,6 @@ config BLK_DEV_SX8
|
||||
|
||||
config BLK_DEV_RAM
|
||||
tristate "RAM block device support"
|
||||
select DAX if BLK_DEV_RAM_DAX
|
||||
---help---
|
||||
Saying Y here will allow you to use a portion of your RAM memory as
|
||||
a block device, so that you can make file systems on it, read and
|
||||
@ -338,17 +337,6 @@ config BLK_DEV_RAM_SIZE
|
||||
The default value is 4096 kilobytes. Only change this if you know
|
||||
what you are doing.
|
||||
|
||||
config BLK_DEV_RAM_DAX
|
||||
bool "Support Direct Access (DAX) to RAM block devices"
|
||||
depends on BLK_DEV_RAM && FS_DAX
|
||||
default n
|
||||
help
|
||||
Support filesystems using DAX to access RAM block devices. This
|
||||
avoids double-buffering data in the page cache before copying it
|
||||
to the block device. Answering Y will slightly enlarge the kernel,
|
||||
and will prevent RAM block device backing store memory from being
|
||||
allocated from highmem (only a problem for highmem systems).
|
||||
|
||||
config CDROM_PKTCDVD
|
||||
tristate "Packet writing on CD/DVD media (DEPRECATED)"
|
||||
depends on !UML
|
||||
|
@ -21,11 +21,6 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
#include <linux/pfn_t.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/uio.h>
|
||||
#endif
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
@ -45,9 +40,6 @@ struct brd_device {
|
||||
|
||||
struct request_queue *brd_queue;
|
||||
struct gendisk *brd_disk;
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
struct dax_device *dax_dev;
|
||||
#endif
|
||||
struct list_head brd_list;
|
||||
|
||||
/*
|
||||
@ -112,9 +104,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
|
||||
* restriction might be able to be lifted.
|
||||
*/
|
||||
gfp_flags = GFP_NOIO | __GFP_ZERO;
|
||||
#ifndef CONFIG_BLK_DEV_RAM_DAX
|
||||
gfp_flags |= __GFP_HIGHMEM;
|
||||
#endif
|
||||
page = alloc_page(gfp_flags);
|
||||
if (!page)
|
||||
return NULL;
|
||||
@ -334,43 +323,6 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
if (!brd)
|
||||
return -ENODEV;
|
||||
page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT);
|
||||
if (!page)
|
||||
return -ENOSPC;
|
||||
*kaddr = page_address(page);
|
||||
*pfn = page_to_pfn_t(page);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static long brd_dax_direct_access(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
struct brd_device *brd = dax_get_private(dax_dev);
|
||||
|
||||
return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn);
|
||||
}
|
||||
|
||||
static size_t brd_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
|
||||
void *addr, size_t bytes, struct iov_iter *i)
|
||||
{
|
||||
return copy_from_iter(addr, bytes, i);
|
||||
}
|
||||
|
||||
static const struct dax_operations brd_dax_ops = {
|
||||
.direct_access = brd_dax_direct_access,
|
||||
.copy_from_iter = brd_dax_copy_from_iter,
|
||||
};
|
||||
#endif
|
||||
|
||||
static const struct block_device_operations brd_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.rw_page = brd_rw_page,
|
||||
@ -451,21 +403,8 @@ static struct brd_device *brd_alloc(int i)
|
||||
set_capacity(disk, rd_size * 2);
|
||||
disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
|
||||
brd->dax_dev = alloc_dax(brd, disk->disk_name, &brd_dax_ops);
|
||||
if (!brd->dax_dev)
|
||||
goto out_free_inode;
|
||||
#endif
|
||||
|
||||
|
||||
return brd;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
out_free_inode:
|
||||
kill_dax(brd->dax_dev);
|
||||
put_dax(brd->dax_dev);
|
||||
#endif
|
||||
out_free_queue:
|
||||
blk_cleanup_queue(brd->brd_queue);
|
||||
out_free_dev:
|
||||
@ -505,10 +444,6 @@ static struct brd_device *brd_init_one(int i, bool *new)
|
||||
static void brd_del_one(struct brd_device *brd)
|
||||
{
|
||||
list_del(&brd->brd_list);
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
kill_dax(brd->dax_dev);
|
||||
put_dax(brd->dax_dev);
|
||||
#endif
|
||||
del_gendisk(brd->brd_disk);
|
||||
brd_free(brd);
|
||||
}
|
||||
|
@ -222,7 +222,8 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
|
||||
unsigned long size)
|
||||
{
|
||||
struct resource *res;
|
||||
phys_addr_t phys;
|
||||
/* gcc-4.6.3-nolibc for i386 complains that this is uninitialized */
|
||||
phys_addr_t uninitialized_var(phys);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dev_dax->num_resources; i++) {
|
||||
|
@ -92,21 +92,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
|
||||
long len;
|
||||
|
||||
if (blocksize != PAGE_SIZE) {
|
||||
pr_err("VFS (%s): error: unsupported blocksize for dax\n",
|
||||
pr_debug("VFS (%s): error: unsupported blocksize for dax\n",
|
||||
sb->s_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
|
||||
if (err) {
|
||||
pr_err("VFS (%s): error: unaligned partition for dax\n",
|
||||
pr_debug("VFS (%s): error: unaligned partition for dax\n",
|
||||
sb->s_id);
|
||||
return err;
|
||||
}
|
||||
|
||||
dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
|
||||
if (!dax_dev) {
|
||||
pr_err("VFS (%s): error: device does not support dax\n",
|
||||
pr_debug("VFS (%s): error: device does not support dax\n",
|
||||
sb->s_id);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
@ -118,7 +118,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
|
||||
put_dax(dax_dev);
|
||||
|
||||
if (len < 1) {
|
||||
pr_err("VFS (%s): error: dax access failed (%ld)",
|
||||
pr_debug("VFS (%s): error: dax access failed (%ld)\n",
|
||||
sb->s_id, len);
|
||||
return len < 0 ? len : -EIO;
|
||||
}
|
||||
@ -273,9 +273,6 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter);
|
||||
void arch_wb_cache_pmem(void *addr, size_t size);
|
||||
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
|
||||
{
|
||||
if (unlikely(!dax_alive(dax_dev)))
|
||||
return;
|
||||
|
||||
if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)))
|
||||
return;
|
||||
|
||||
@ -344,6 +341,9 @@ static struct inode *dax_alloc_inode(struct super_block *sb)
|
||||
struct inode *inode;
|
||||
|
||||
dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
|
||||
if (!dax_dev)
|
||||
return NULL;
|
||||
|
||||
inode = &dax_dev->inode;
|
||||
inode->i_rdev = 0;
|
||||
return inode;
|
||||
|
@ -21,6 +21,7 @@ libnvdimm-y += region_devs.o
|
||||
libnvdimm-y += region.o
|
||||
libnvdimm-y += namespace_devs.o
|
||||
libnvdimm-y += label.o
|
||||
libnvdimm-y += badrange.o
|
||||
libnvdimm-$(CONFIG_ND_CLAIM) += claim.o
|
||||
libnvdimm-$(CONFIG_BTT) += btt_devs.o
|
||||
libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
|
||||
|
293
drivers/nvdimm/badrange.c
Normal file
293
drivers/nvdimm/badrange.c
Normal file
@ -0,0 +1,293 @@
|
||||
/*
|
||||
* Copyright(c) 2017 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/libnvdimm.h>
|
||||
#include <linux/badblocks.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/ndctl.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/io.h>
|
||||
#include "nd-core.h"
|
||||
#include "nd.h"
|
||||
|
||||
void badrange_init(struct badrange *badrange)
|
||||
{
|
||||
INIT_LIST_HEAD(&badrange->list);
|
||||
spin_lock_init(&badrange->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(badrange_init);
|
||||
|
||||
static void append_badrange_entry(struct badrange *badrange,
|
||||
struct badrange_entry *bre, u64 addr, u64 length)
|
||||
{
|
||||
lockdep_assert_held(&badrange->lock);
|
||||
bre->start = addr;
|
||||
bre->length = length;
|
||||
list_add_tail(&bre->list, &badrange->list);
|
||||
}
|
||||
|
||||
static int alloc_and_append_badrange_entry(struct badrange *badrange,
|
||||
u64 addr, u64 length, gfp_t flags)
|
||||
{
|
||||
struct badrange_entry *bre;
|
||||
|
||||
bre = kzalloc(sizeof(*bre), flags);
|
||||
if (!bre)
|
||||
return -ENOMEM;
|
||||
|
||||
append_badrange_entry(badrange, bre, addr, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_badrange(struct badrange *badrange, u64 addr, u64 length)
|
||||
{
|
||||
struct badrange_entry *bre, *bre_new;
|
||||
|
||||
spin_unlock(&badrange->lock);
|
||||
bre_new = kzalloc(sizeof(*bre_new), GFP_KERNEL);
|
||||
spin_lock(&badrange->lock);
|
||||
|
||||
if (list_empty(&badrange->list)) {
|
||||
if (!bre_new)
|
||||
return -ENOMEM;
|
||||
append_badrange_entry(badrange, bre_new, addr, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* There is a chance this is a duplicate, check for those first.
|
||||
* This will be the common case as ARS_STATUS returns all known
|
||||
* errors in the SPA space, and we can't query it per region
|
||||
*/
|
||||
list_for_each_entry(bre, &badrange->list, list)
|
||||
if (bre->start == addr) {
|
||||
/* If length has changed, update this list entry */
|
||||
if (bre->length != length)
|
||||
bre->length = length;
|
||||
kfree(bre_new);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If not a duplicate or a simple length update, add the entry as is,
|
||||
* as any overlapping ranges will get resolved when the list is consumed
|
||||
* and converted to badblocks
|
||||
*/
|
||||
if (!bre_new)
|
||||
return -ENOMEM;
|
||||
append_badrange_entry(badrange, bre_new, addr, length);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int badrange_add(struct badrange *badrange, u64 addr, u64 length)
|
||||
{
|
||||
int rc;
|
||||
|
||||
spin_lock(&badrange->lock);
|
||||
rc = add_badrange(badrange, addr, length);
|
||||
spin_unlock(&badrange->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(badrange_add);
|
||||
|
||||
void badrange_forget(struct badrange *badrange, phys_addr_t start,
|
||||
unsigned int len)
|
||||
{
|
||||
struct list_head *badrange_list = &badrange->list;
|
||||
u64 clr_end = start + len - 1;
|
||||
struct badrange_entry *bre, *next;
|
||||
|
||||
spin_lock(&badrange->lock);
|
||||
|
||||
/*
|
||||
* [start, clr_end] is the badrange interval being cleared.
|
||||
* [bre->start, bre_end] is the badrange_list entry we're comparing
|
||||
* the above interval against. The badrange list entry may need
|
||||
* to be modified (update either start or length), deleted, or
|
||||
* split into two based on the overlap characteristics
|
||||
*/
|
||||
|
||||
list_for_each_entry_safe(bre, next, badrange_list, list) {
|
||||
u64 bre_end = bre->start + bre->length - 1;
|
||||
|
||||
/* Skip intervals with no intersection */
|
||||
if (bre_end < start)
|
||||
continue;
|
||||
if (bre->start > clr_end)
|
||||
continue;
|
||||
/* Delete completely overlapped badrange entries */
|
||||
if ((bre->start >= start) && (bre_end <= clr_end)) {
|
||||
list_del(&bre->list);
|
||||
kfree(bre);
|
||||
continue;
|
||||
}
|
||||
/* Adjust start point of partially cleared entries */
|
||||
if ((start <= bre->start) && (clr_end > bre->start)) {
|
||||
bre->length -= clr_end - bre->start + 1;
|
||||
bre->start = clr_end + 1;
|
||||
continue;
|
||||
}
|
||||
/* Adjust bre->length for partial clearing at the tail end */
|
||||
if ((bre->start < start) && (bre_end <= clr_end)) {
|
||||
/* bre->start remains the same */
|
||||
bre->length = start - bre->start;
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* If clearing in the middle of an entry, we split it into
|
||||
* two by modifying the current entry to represent one half of
|
||||
* the split, and adding a new entry for the second half.
|
||||
*/
|
||||
if ((bre->start < start) && (bre_end > clr_end)) {
|
||||
u64 new_start = clr_end + 1;
|
||||
u64 new_len = bre_end - new_start + 1;
|
||||
|
||||
/* Add new entry covering the right half */
|
||||
alloc_and_append_badrange_entry(badrange, new_start,
|
||||
new_len, GFP_NOWAIT);
|
||||
/* Adjust this entry to cover the left half */
|
||||
bre->length = start - bre->start;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
spin_unlock(&badrange->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(badrange_forget);
|
||||
|
||||
static void set_badblock(struct badblocks *bb, sector_t s, int num)
|
||||
{
|
||||
dev_dbg(bb->dev, "Found a bad range (0x%llx, 0x%llx)\n",
|
||||
(u64) s * 512, (u64) num * 512);
|
||||
/* this isn't an error as the hardware will still throw an exception */
|
||||
if (badblocks_set(bb, s, num, 1))
|
||||
dev_info_once(bb->dev, "%s: failed for sector %llx\n",
|
||||
__func__, (u64) s);
|
||||
}
|
||||
|
||||
/**
|
||||
* __add_badblock_range() - Convert a physical address range to bad sectors
|
||||
* @bb: badblocks instance to populate
|
||||
* @ns_offset: namespace offset where the error range begins (in bytes)
|
||||
* @len: number of bytes of badrange to be added
|
||||
*
|
||||
* This assumes that the range provided with (ns_offset, len) is within
|
||||
* the bounds of physical addresses for this namespace, i.e. lies in the
|
||||
* interval [ns_start, ns_start + ns_size)
|
||||
*/
|
||||
static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
|
||||
{
|
||||
const unsigned int sector_size = 512;
|
||||
sector_t start_sector, end_sector;
|
||||
u64 num_sectors;
|
||||
u32 rem;
|
||||
|
||||
start_sector = div_u64(ns_offset, sector_size);
|
||||
end_sector = div_u64_rem(ns_offset + len, sector_size, &rem);
|
||||
if (rem)
|
||||
end_sector++;
|
||||
num_sectors = end_sector - start_sector;
|
||||
|
||||
if (unlikely(num_sectors > (u64)INT_MAX)) {
|
||||
u64 remaining = num_sectors;
|
||||
sector_t s = start_sector;
|
||||
|
||||
while (remaining) {
|
||||
int done = min_t(u64, remaining, INT_MAX);
|
||||
|
||||
set_badblock(bb, s, done);
|
||||
remaining -= done;
|
||||
s += done;
|
||||
}
|
||||
} else
|
||||
set_badblock(bb, start_sector, num_sectors);
|
||||
}
|
||||
|
||||
static void badblocks_populate(struct badrange *badrange,
|
||||
struct badblocks *bb, const struct resource *res)
|
||||
{
|
||||
struct badrange_entry *bre;
|
||||
|
||||
if (list_empty(&badrange->list))
|
||||
return;
|
||||
|
||||
list_for_each_entry(bre, &badrange->list, list) {
|
||||
u64 bre_end = bre->start + bre->length - 1;
|
||||
|
||||
/* Discard intervals with no intersection */
|
||||
if (bre_end < res->start)
|
||||
continue;
|
||||
if (bre->start > res->end)
|
||||
continue;
|
||||
/* Deal with any overlap after start of the namespace */
|
||||
if (bre->start >= res->start) {
|
||||
u64 start = bre->start;
|
||||
u64 len;
|
||||
|
||||
if (bre_end <= res->end)
|
||||
len = bre->length;
|
||||
else
|
||||
len = res->start + resource_size(res)
|
||||
- bre->start;
|
||||
__add_badblock_range(bb, start - res->start, len);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Deal with overlap for badrange starting before
|
||||
* the namespace.
|
||||
*/
|
||||
if (bre->start < res->start) {
|
||||
u64 len;
|
||||
|
||||
if (bre_end < res->end)
|
||||
len = bre->start + bre->length - res->start;
|
||||
else
|
||||
len = resource_size(res);
|
||||
__add_badblock_range(bb, 0, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* nvdimm_badblocks_populate() - Convert a list of badranges to badblocks
|
||||
* @region: parent region of the range to interrogate
|
||||
* @bb: badblocks instance to populate
|
||||
* @res: resource range to consider
|
||||
*
|
||||
* The badrange list generated during bus initialization may contain
|
||||
* multiple, possibly overlapping physical address ranges. Compare each
|
||||
* of these ranges to the resource range currently being initialized,
|
||||
* and add badblocks entries for all matching sub-ranges
|
||||
*/
|
||||
void nvdimm_badblocks_populate(struct nd_region *nd_region,
|
||||
struct badblocks *bb, const struct resource *res)
|
||||
{
|
||||
struct nvdimm_bus *nvdimm_bus;
|
||||
|
||||
if (!is_memory(&nd_region->dev)) {
|
||||
dev_WARN_ONCE(&nd_region->dev, 1,
|
||||
"%s only valid for pmem regions\n", __func__);
|
||||
return;
|
||||
}
|
||||
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
|
||||
|
||||
nvdimm_bus_lock(&nvdimm_bus->dev);
|
||||
badblocks_populate(&nvdimm_bus->badrange, bb, res);
|
||||
nvdimm_bus_unlock(&nvdimm_bus->dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
|
@ -11,6 +11,7 @@
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
#include <linux/libnvdimm.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
@ -221,7 +222,7 @@ static void nvdimm_account_cleared_poison(struct nvdimm_bus *nvdimm_bus,
|
||||
phys_addr_t phys, u64 cleared)
|
||||
{
|
||||
if (cleared > 0)
|
||||
nvdimm_forget_poison(nvdimm_bus, phys, cleared);
|
||||
badrange_forget(&nvdimm_bus->badrange, phys, cleared);
|
||||
|
||||
if (cleared > 0 && cleared / 512)
|
||||
nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared);
|
||||
@ -344,11 +345,10 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
|
||||
return NULL;
|
||||
INIT_LIST_HEAD(&nvdimm_bus->list);
|
||||
INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
|
||||
INIT_LIST_HEAD(&nvdimm_bus->poison_list);
|
||||
init_waitqueue_head(&nvdimm_bus->probe_wait);
|
||||
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
|
||||
mutex_init(&nvdimm_bus->reconfig_mutex);
|
||||
spin_lock_init(&nvdimm_bus->poison_lock);
|
||||
badrange_init(&nvdimm_bus->badrange);
|
||||
if (nvdimm_bus->id < 0) {
|
||||
kfree(nvdimm_bus);
|
||||
return NULL;
|
||||
@ -395,15 +395,15 @@ static int child_unregister(struct device *dev, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_poison_list(struct list_head *poison_list)
|
||||
static void free_badrange_list(struct list_head *badrange_list)
|
||||
{
|
||||
struct nd_poison *pl, *next;
|
||||
struct badrange_entry *bre, *next;
|
||||
|
||||
list_for_each_entry_safe(pl, next, poison_list, list) {
|
||||
list_del(&pl->list);
|
||||
kfree(pl);
|
||||
list_for_each_entry_safe(bre, next, badrange_list, list) {
|
||||
list_del(&bre->list);
|
||||
kfree(bre);
|
||||
}
|
||||
list_del_init(poison_list);
|
||||
list_del_init(badrange_list);
|
||||
}
|
||||
|
||||
static int nd_bus_remove(struct device *dev)
|
||||
@ -417,9 +417,9 @@ static int nd_bus_remove(struct device *dev)
|
||||
nd_synchronize();
|
||||
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
|
||||
|
||||
spin_lock(&nvdimm_bus->poison_lock);
|
||||
free_poison_list(&nvdimm_bus->poison_list);
|
||||
spin_unlock(&nvdimm_bus->poison_lock);
|
||||
spin_lock(&nvdimm_bus->badrange.lock);
|
||||
free_badrange_list(&nvdimm_bus->badrange.list);
|
||||
spin_unlock(&nvdimm_bus->badrange.lock);
|
||||
|
||||
nvdimm_bus_destroy_ndctl(nvdimm_bus);
|
||||
|
||||
|
@ -398,265 +398,11 @@ struct attribute_group nvdimm_bus_attribute_group = {
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
|
||||
|
||||
static void set_badblock(struct badblocks *bb, sector_t s, int num)
|
||||
int nvdimm_bus_add_badrange(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
|
||||
{
|
||||
dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
|
||||
(u64) s * 512, (u64) num * 512);
|
||||
/* this isn't an error as the hardware will still throw an exception */
|
||||
if (badblocks_set(bb, s, num, 1))
|
||||
dev_info_once(bb->dev, "%s: failed for sector %llx\n",
|
||||
__func__, (u64) s);
|
||||
return badrange_add(&nvdimm_bus->badrange, addr, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* __add_badblock_range() - Convert a physical address range to bad sectors
|
||||
* @bb: badblocks instance to populate
|
||||
* @ns_offset: namespace offset where the error range begins (in bytes)
|
||||
* @len: number of bytes of poison to be added
|
||||
*
|
||||
* This assumes that the range provided with (ns_offset, len) is within
|
||||
* the bounds of physical addresses for this namespace, i.e. lies in the
|
||||
* interval [ns_start, ns_start + ns_size)
|
||||
*/
|
||||
static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
|
||||
{
|
||||
const unsigned int sector_size = 512;
|
||||
sector_t start_sector, end_sector;
|
||||
u64 num_sectors;
|
||||
u32 rem;
|
||||
|
||||
start_sector = div_u64(ns_offset, sector_size);
|
||||
end_sector = div_u64_rem(ns_offset + len, sector_size, &rem);
|
||||
if (rem)
|
||||
end_sector++;
|
||||
num_sectors = end_sector - start_sector;
|
||||
|
||||
if (unlikely(num_sectors > (u64)INT_MAX)) {
|
||||
u64 remaining = num_sectors;
|
||||
sector_t s = start_sector;
|
||||
|
||||
while (remaining) {
|
||||
int done = min_t(u64, remaining, INT_MAX);
|
||||
|
||||
set_badblock(bb, s, done);
|
||||
remaining -= done;
|
||||
s += done;
|
||||
}
|
||||
} else
|
||||
set_badblock(bb, start_sector, num_sectors);
|
||||
}
|
||||
|
||||
static void badblocks_populate(struct list_head *poison_list,
|
||||
struct badblocks *bb, const struct resource *res)
|
||||
{
|
||||
struct nd_poison *pl;
|
||||
|
||||
if (list_empty(poison_list))
|
||||
return;
|
||||
|
||||
list_for_each_entry(pl, poison_list, list) {
|
||||
u64 pl_end = pl->start + pl->length - 1;
|
||||
|
||||
/* Discard intervals with no intersection */
|
||||
if (pl_end < res->start)
|
||||
continue;
|
||||
if (pl->start > res->end)
|
||||
continue;
|
||||
/* Deal with any overlap after start of the namespace */
|
||||
if (pl->start >= res->start) {
|
||||
u64 start = pl->start;
|
||||
u64 len;
|
||||
|
||||
if (pl_end <= res->end)
|
||||
len = pl->length;
|
||||
else
|
||||
len = res->start + resource_size(res)
|
||||
- pl->start;
|
||||
__add_badblock_range(bb, start - res->start, len);
|
||||
continue;
|
||||
}
|
||||
/* Deal with overlap for poison starting before the namespace */
|
||||
if (pl->start < res->start) {
|
||||
u64 len;
|
||||
|
||||
if (pl_end < res->end)
|
||||
len = pl->start + pl->length - res->start;
|
||||
else
|
||||
len = resource_size(res);
|
||||
__add_badblock_range(bb, 0, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* nvdimm_badblocks_populate() - Convert a list of poison ranges to badblocks
|
||||
* @region: parent region of the range to interrogate
|
||||
* @bb: badblocks instance to populate
|
||||
* @res: resource range to consider
|
||||
*
|
||||
* The poison list generated during bus initialization may contain
|
||||
* multiple, possibly overlapping physical address ranges. Compare each
|
||||
* of these ranges to the resource range currently being initialized,
|
||||
* and add badblocks entries for all matching sub-ranges
|
||||
*/
|
||||
void nvdimm_badblocks_populate(struct nd_region *nd_region,
|
||||
struct badblocks *bb, const struct resource *res)
|
||||
{
|
||||
struct nvdimm_bus *nvdimm_bus;
|
||||
struct list_head *poison_list;
|
||||
|
||||
if (!is_memory(&nd_region->dev)) {
|
||||
dev_WARN_ONCE(&nd_region->dev, 1,
|
||||
"%s only valid for pmem regions\n", __func__);
|
||||
return;
|
||||
}
|
||||
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
|
||||
poison_list = &nvdimm_bus->poison_list;
|
||||
|
||||
nvdimm_bus_lock(&nvdimm_bus->dev);
|
||||
badblocks_populate(poison_list, bb, res);
|
||||
nvdimm_bus_unlock(&nvdimm_bus->dev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
|
||||
|
||||
static void append_poison_entry(struct nvdimm_bus *nvdimm_bus,
|
||||
struct nd_poison *pl, u64 addr, u64 length)
|
||||
{
|
||||
lockdep_assert_held(&nvdimm_bus->poison_lock);
|
||||
pl->start = addr;
|
||||
pl->length = length;
|
||||
list_add_tail(&pl->list, &nvdimm_bus->poison_list);
|
||||
}
|
||||
|
||||
static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
|
||||
gfp_t flags)
|
||||
{
|
||||
struct nd_poison *pl;
|
||||
|
||||
pl = kzalloc(sizeof(*pl), flags);
|
||||
if (!pl)
|
||||
return -ENOMEM;
|
||||
|
||||
append_poison_entry(nvdimm_bus, pl, addr, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
|
||||
{
|
||||
struct nd_poison *pl, *pl_new;
|
||||
|
||||
spin_unlock(&nvdimm_bus->poison_lock);
|
||||
pl_new = kzalloc(sizeof(*pl_new), GFP_KERNEL);
|
||||
spin_lock(&nvdimm_bus->poison_lock);
|
||||
|
||||
if (list_empty(&nvdimm_bus->poison_list)) {
|
||||
if (!pl_new)
|
||||
return -ENOMEM;
|
||||
append_poison_entry(nvdimm_bus, pl_new, addr, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* There is a chance this is a duplicate, check for those first.
|
||||
* This will be the common case as ARS_STATUS returns all known
|
||||
* errors in the SPA space, and we can't query it per region
|
||||
*/
|
||||
list_for_each_entry(pl, &nvdimm_bus->poison_list, list)
|
||||
if (pl->start == addr) {
|
||||
/* If length has changed, update this list entry */
|
||||
if (pl->length != length)
|
||||
pl->length = length;
|
||||
kfree(pl_new);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If not a duplicate or a simple length update, add the entry as is,
|
||||
* as any overlapping ranges will get resolved when the list is consumed
|
||||
* and converted to badblocks
|
||||
*/
|
||||
if (!pl_new)
|
||||
return -ENOMEM;
|
||||
append_poison_entry(nvdimm_bus, pl_new, addr, length);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
|
||||
{
|
||||
int rc;
|
||||
|
||||
spin_lock(&nvdimm_bus->poison_lock);
|
||||
rc = bus_add_poison(nvdimm_bus, addr, length);
|
||||
spin_unlock(&nvdimm_bus->poison_lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
|
||||
|
||||
void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
|
||||
unsigned int len)
|
||||
{
|
||||
struct list_head *poison_list = &nvdimm_bus->poison_list;
|
||||
u64 clr_end = start + len - 1;
|
||||
struct nd_poison *pl, *next;
|
||||
|
||||
spin_lock(&nvdimm_bus->poison_lock);
|
||||
WARN_ON_ONCE(list_empty(poison_list));
|
||||
|
||||
/*
|
||||
* [start, clr_end] is the poison interval being cleared.
|
||||
* [pl->start, pl_end] is the poison_list entry we're comparing
|
||||
* the above interval against. The poison list entry may need
|
||||
* to be modified (update either start or length), deleted, or
|
||||
* split into two based on the overlap characteristics
|
||||
*/
|
||||
|
||||
list_for_each_entry_safe(pl, next, poison_list, list) {
|
||||
u64 pl_end = pl->start + pl->length - 1;
|
||||
|
||||
/* Skip intervals with no intersection */
|
||||
if (pl_end < start)
|
||||
continue;
|
||||
if (pl->start > clr_end)
|
||||
continue;
|
||||
/* Delete completely overlapped poison entries */
|
||||
if ((pl->start >= start) && (pl_end <= clr_end)) {
|
||||
list_del(&pl->list);
|
||||
kfree(pl);
|
||||
continue;
|
||||
}
|
||||
/* Adjust start point of partially cleared entries */
|
||||
if ((start <= pl->start) && (clr_end > pl->start)) {
|
||||
pl->length -= clr_end - pl->start + 1;
|
||||
pl->start = clr_end + 1;
|
||||
continue;
|
||||
}
|
||||
/* Adjust pl->length for partial clearing at the tail end */
|
||||
if ((pl->start < start) && (pl_end <= clr_end)) {
|
||||
/* pl->start remains the same */
|
||||
pl->length = start - pl->start;
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* If clearing in the middle of an entry, we split it into
|
||||
* two by modifying the current entry to represent one half of
|
||||
* the split, and adding a new entry for the second half.
|
||||
*/
|
||||
if ((pl->start < start) && (pl_end > clr_end)) {
|
||||
u64 new_start = clr_end + 1;
|
||||
u64 new_len = pl_end - new_start + 1;
|
||||
|
||||
/* Add new entry covering the right half */
|
||||
add_poison(nvdimm_bus, new_start, new_len, GFP_NOWAIT);
|
||||
/* Adjust this entry to cover the left half */
|
||||
pl->length = start - pl->start;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
spin_unlock(&nvdimm_bus->poison_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvdimm_forget_poison);
|
||||
EXPORT_SYMBOL_GPL(nvdimm_bus_add_badrange);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
|
||||
|
@ -55,6 +55,8 @@ static int nvdimm_probe(struct device *dev)
|
||||
goto err;
|
||||
|
||||
rc = nvdimm_init_config_data(ndd);
|
||||
if (rc == -EACCES)
|
||||
nvdimm_set_locked(dev);
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
@ -68,6 +70,7 @@ static int nvdimm_probe(struct device *dev)
|
||||
rc = nd_label_reserve_dpa(ndd);
|
||||
if (ndd->ns_current >= 0)
|
||||
nvdimm_set_aliasing(dev);
|
||||
nvdimm_clear_locked(dev);
|
||||
nvdimm_bus_unlock(dev);
|
||||
|
||||
if (rc)
|
||||
|
@ -200,6 +200,13 @@ void nvdimm_set_locked(struct device *dev)
|
||||
set_bit(NDD_LOCKED, &nvdimm->flags);
|
||||
}
|
||||
|
||||
void nvdimm_clear_locked(struct device *dev)
|
||||
{
|
||||
struct nvdimm *nvdimm = to_nvdimm(dev);
|
||||
|
||||
clear_bit(NDD_LOCKED, &nvdimm->flags);
|
||||
}
|
||||
|
||||
static void nvdimm_release(struct device *dev)
|
||||
{
|
||||
struct nvdimm *nvdimm = to_nvdimm(dev);
|
||||
@ -324,6 +331,17 @@ static ssize_t commands_show(struct device *dev,
|
||||
}
|
||||
static DEVICE_ATTR_RO(commands);
|
||||
|
||||
static ssize_t flags_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct nvdimm *nvdimm = to_nvdimm(dev);
|
||||
|
||||
return sprintf(buf, "%s%s\n",
|
||||
test_bit(NDD_ALIASING, &nvdimm->flags) ? "alias " : "",
|
||||
test_bit(NDD_LOCKED, &nvdimm->flags) ? "lock " : "");
|
||||
}
|
||||
static DEVICE_ATTR_RO(flags);
|
||||
|
||||
static ssize_t state_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
@ -365,6 +383,7 @@ static DEVICE_ATTR_RO(available_slots);
|
||||
|
||||
static struct attribute *nvdimm_attributes[] = {
|
||||
&dev_attr_state.attr,
|
||||
&dev_attr_flags.attr,
|
||||
&dev_attr_commands.attr,
|
||||
&dev_attr_available_slots.attr,
|
||||
NULL,
|
||||
|
@ -1050,7 +1050,7 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
|
||||
nsindex = to_namespace_index(ndd, 0);
|
||||
memset(nsindex, 0, ndd->nsarea.config_size);
|
||||
for (i = 0; i < 2; i++) {
|
||||
int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT);
|
||||
int rc = nd_label_write_index(ndd, i, 3 - i, ND_NSINDEX_INIT);
|
||||
|
||||
if (rc)
|
||||
return rc;
|
||||
|
@ -1620,7 +1620,7 @@ static umode_t namespace_visible(struct kobject *kobj,
|
||||
if (a == &dev_attr_resource.attr) {
|
||||
if (is_namespace_blk(dev))
|
||||
return 0;
|
||||
return a->mode;
|
||||
return 0400;
|
||||
}
|
||||
|
||||
if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
|
||||
@ -1875,7 +1875,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
|
||||
* @nspm: target namespace to create
|
||||
* @nd_label: target pmem namespace label to evaluate
|
||||
*/
|
||||
struct device *create_namespace_pmem(struct nd_region *nd_region,
|
||||
static struct device *create_namespace_pmem(struct nd_region *nd_region,
|
||||
struct nd_namespace_index *nsindex,
|
||||
struct nd_namespace_label *nd_label)
|
||||
{
|
||||
@ -2186,7 +2186,7 @@ static int add_namespace_resource(struct nd_region *nd_region,
|
||||
return i;
|
||||
}
|
||||
|
||||
struct device *create_namespace_blk(struct nd_region *nd_region,
|
||||
static struct device *create_namespace_blk(struct nd_region *nd_region,
|
||||
struct nd_namespace_label *nd_label, int count)
|
||||
{
|
||||
|
||||
|
@ -29,10 +29,9 @@ struct nvdimm_bus {
|
||||
struct list_head list;
|
||||
struct device dev;
|
||||
int id, probe_active;
|
||||
struct list_head poison_list;
|
||||
struct list_head mapping_list;
|
||||
struct mutex reconfig_mutex;
|
||||
spinlock_t poison_lock;
|
||||
struct badrange badrange;
|
||||
};
|
||||
|
||||
struct nvdimm {
|
||||
|
@ -34,12 +34,6 @@ enum {
|
||||
NVDIMM_IO_ATOMIC = 1,
|
||||
};
|
||||
|
||||
struct nd_poison {
|
||||
u64 start;
|
||||
u64 length;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct nvdimm_drvdata {
|
||||
struct device *dev;
|
||||
int nslabel_size;
|
||||
@ -254,6 +248,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
|
||||
unsigned int len);
|
||||
void nvdimm_set_aliasing(struct device *dev);
|
||||
void nvdimm_set_locked(struct device *dev);
|
||||
void nvdimm_clear_locked(struct device *dev);
|
||||
struct nd_btt *to_nd_btt(struct device *dev);
|
||||
|
||||
struct nd_gen_sb {
|
||||
|
@ -282,8 +282,16 @@ static struct attribute *nd_pfn_attributes[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static umode_t pfn_visible(struct kobject *kobj, struct attribute *a, int n)
|
||||
{
|
||||
if (a == &dev_attr_resource.attr)
|
||||
return 0400;
|
||||
return a->mode;
|
||||
}
|
||||
|
||||
struct attribute_group nd_pfn_attribute_group = {
|
||||
.attrs = nd_pfn_attributes,
|
||||
.is_visible = pfn_visible,
|
||||
};
|
||||
|
||||
static const struct attribute_group *nd_pfn_attribute_groups[] = {
|
||||
|
@ -562,8 +562,12 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
|
||||
if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr)
|
||||
return 0;
|
||||
|
||||
if (!is_nd_pmem(dev) && a == &dev_attr_resource.attr)
|
||||
return 0;
|
||||
if (a == &dev_attr_resource.attr) {
|
||||
if (is_nd_pmem(dev))
|
||||
return 0400;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (a == &dev_attr_deep_flush.attr) {
|
||||
int has_flush = nvdimm_has_flush(nd_region);
|
||||
|
323
fs/dax.c
323
fs/dax.c
@ -526,13 +526,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
|
||||
static void *dax_insert_mapping_entry(struct address_space *mapping,
|
||||
struct vm_fault *vmf,
|
||||
void *entry, sector_t sector,
|
||||
unsigned long flags)
|
||||
unsigned long flags, bool dirty)
|
||||
{
|
||||
struct radix_tree_root *page_tree = &mapping->page_tree;
|
||||
void *new_entry;
|
||||
pgoff_t index = vmf->pgoff;
|
||||
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
if (dirty)
|
||||
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
||||
|
||||
if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) {
|
||||
@ -569,7 +569,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
|
||||
entry = new_entry;
|
||||
}
|
||||
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
if (dirty)
|
||||
radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
|
||||
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
@ -825,38 +825,42 @@ int dax_writeback_mapping_range(struct address_space *mapping,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
|
||||
|
||||
static int dax_insert_mapping(struct address_space *mapping,
|
||||
struct block_device *bdev, struct dax_device *dax_dev,
|
||||
sector_t sector, size_t size, void *entry,
|
||||
struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
|
||||
{
|
||||
unsigned long vaddr = vmf->address;
|
||||
void *ret, *kaddr;
|
||||
pgoff_t pgoff;
|
||||
int id, rc;
|
||||
pfn_t pfn;
|
||||
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
|
||||
}
|
||||
|
||||
rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
|
||||
static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
|
||||
pfn_t *pfnp)
|
||||
{
|
||||
const sector_t sector = dax_iomap_sector(iomap, pos);
|
||||
pgoff_t pgoff;
|
||||
void *kaddr;
|
||||
int id, rc;
|
||||
long length;
|
||||
|
||||
rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
id = dax_read_lock();
|
||||
rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn);
|
||||
if (rc < 0) {
|
||||
dax_read_unlock(id);
|
||||
return rc;
|
||||
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
|
||||
&kaddr, pfnp);
|
||||
if (length < 0) {
|
||||
rc = length;
|
||||
goto out;
|
||||
}
|
||||
rc = -EINVAL;
|
||||
if (PFN_PHYS(length) < size)
|
||||
goto out;
|
||||
if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
|
||||
goto out;
|
||||
/* For larger pages we need devmap */
|
||||
if (length > 1 && !pfn_t_devmap(*pfnp))
|
||||
goto out;
|
||||
rc = 0;
|
||||
out:
|
||||
dax_read_unlock(id);
|
||||
|
||||
ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0);
|
||||
if (IS_ERR(ret))
|
||||
return PTR_ERR(ret);
|
||||
|
||||
trace_dax_insert_mapping(mapping->host, vmf, ret);
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
return vm_insert_mixed_mkwrite(vma, vaddr, pfn);
|
||||
else
|
||||
return vm_insert_mixed(vma, vaddr, pfn);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -882,7 +886,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
|
||||
}
|
||||
|
||||
entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
|
||||
RADIX_DAX_ZERO_PAGE);
|
||||
RADIX_DAX_ZERO_PAGE, false);
|
||||
if (IS_ERR(entry2)) {
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
goto out;
|
||||
@ -941,11 +945,6 @@ int __dax_zero_page_range(struct block_device *bdev,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
|
||||
|
||||
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
|
||||
{
|
||||
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
|
||||
}
|
||||
|
||||
static loff_t
|
||||
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
||||
struct iomap *iomap)
|
||||
@ -1085,19 +1084,33 @@ static int dax_fault_return(int error)
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
/*
|
||||
* MAP_SYNC on a dax mapping guarantees dirty metadata is
|
||||
* flushed on write-faults (non-cow), but not read-faults.
|
||||
*/
|
||||
static bool dax_fault_is_synchronous(unsigned long flags,
|
||||
struct vm_area_struct *vma, struct iomap *iomap)
|
||||
{
|
||||
return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
|
||||
&& (iomap->flags & IOMAP_F_DIRTY);
|
||||
}
|
||||
|
||||
static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
||||
const struct iomap_ops *ops)
|
||||
{
|
||||
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned long vaddr = vmf->address;
|
||||
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
|
||||
sector_t sector;
|
||||
struct iomap iomap = { 0 };
|
||||
unsigned flags = IOMAP_FAULT;
|
||||
int error, major = 0;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
bool sync;
|
||||
int vmf_ret = 0;
|
||||
void *entry;
|
||||
pfn_t pfn;
|
||||
|
||||
trace_dax_pte_fault(inode, vmf, vmf_ret);
|
||||
/*
|
||||
@ -1110,7 +1123,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
|
||||
if (write && !vmf->cow_page)
|
||||
flags |= IOMAP_WRITE;
|
||||
|
||||
entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
|
||||
@ -1145,9 +1158,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
goto error_finish_iomap;
|
||||
}
|
||||
|
||||
sector = dax_iomap_sector(&iomap, pos);
|
||||
|
||||
if (vmf->cow_page) {
|
||||
sector_t sector = dax_iomap_sector(&iomap, pos);
|
||||
|
||||
switch (iomap.type) {
|
||||
case IOMAP_HOLE:
|
||||
case IOMAP_UNWRITTEN:
|
||||
@ -1173,22 +1186,55 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
goto finish_iomap;
|
||||
}
|
||||
|
||||
sync = dax_fault_is_synchronous(flags, vma, &iomap);
|
||||
|
||||
switch (iomap.type) {
|
||||
case IOMAP_MAPPED:
|
||||
if (iomap.flags & IOMAP_F_NEW) {
|
||||
count_vm_event(PGMAJFAULT);
|
||||
count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
|
||||
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
|
||||
major = VM_FAULT_MAJOR;
|
||||
}
|
||||
error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
|
||||
sector, PAGE_SIZE, entry, vmf->vma, vmf);
|
||||
error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
|
||||
if (error < 0)
|
||||
goto error_finish_iomap;
|
||||
|
||||
entry = dax_insert_mapping_entry(mapping, vmf, entry,
|
||||
dax_iomap_sector(&iomap, pos),
|
||||
0, write && !sync);
|
||||
if (IS_ERR(entry)) {
|
||||
error = PTR_ERR(entry);
|
||||
goto error_finish_iomap;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are doing synchronous page fault and inode needs fsync,
|
||||
* we can insert PTE into page tables only after that happens.
|
||||
* Skip insertion for now and return the pfn so that caller can
|
||||
* insert it after fsync is done.
|
||||
*/
|
||||
if (sync) {
|
||||
if (WARN_ON_ONCE(!pfnp)) {
|
||||
error = -EIO;
|
||||
goto error_finish_iomap;
|
||||
}
|
||||
*pfnp = pfn;
|
||||
vmf_ret = VM_FAULT_NEEDDSYNC | major;
|
||||
goto finish_iomap;
|
||||
}
|
||||
trace_dax_insert_mapping(inode, vmf, entry);
|
||||
if (write)
|
||||
error = vm_insert_mixed_mkwrite(vma, vaddr, pfn);
|
||||
else
|
||||
error = vm_insert_mixed(vma, vaddr, pfn);
|
||||
|
||||
/* -EBUSY is fine, somebody else faulted on the same PTE */
|
||||
if (error == -EBUSY)
|
||||
error = 0;
|
||||
break;
|
||||
case IOMAP_UNWRITTEN:
|
||||
case IOMAP_HOLE:
|
||||
if (!(vmf->flags & FAULT_FLAG_WRITE)) {
|
||||
if (!write) {
|
||||
vmf_ret = dax_load_hole(mapping, entry, vmf);
|
||||
goto finish_iomap;
|
||||
}
|
||||
@ -1223,53 +1269,11 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX_PMD
|
||||
static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
|
||||
loff_t pos, void *entry)
|
||||
{
|
||||
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
||||
const sector_t sector = dax_iomap_sector(iomap, pos);
|
||||
struct dax_device *dax_dev = iomap->dax_dev;
|
||||
struct block_device *bdev = iomap->bdev;
|
||||
struct inode *inode = mapping->host;
|
||||
const size_t size = PMD_SIZE;
|
||||
void *ret = NULL, *kaddr;
|
||||
long length = 0;
|
||||
pgoff_t pgoff;
|
||||
pfn_t pfn = {};
|
||||
int id;
|
||||
|
||||
if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0)
|
||||
goto fallback;
|
||||
|
||||
id = dax_read_lock();
|
||||
length = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn);
|
||||
if (length < 0)
|
||||
goto unlock_fallback;
|
||||
length = PFN_PHYS(length);
|
||||
|
||||
if (length < size)
|
||||
goto unlock_fallback;
|
||||
if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR)
|
||||
goto unlock_fallback;
|
||||
if (!pfn_t_devmap(pfn))
|
||||
goto unlock_fallback;
|
||||
dax_read_unlock(id);
|
||||
|
||||
ret = dax_insert_mapping_entry(mapping, vmf, entry, sector,
|
||||
RADIX_DAX_PMD);
|
||||
if (IS_ERR(ret))
|
||||
goto fallback;
|
||||
|
||||
trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret);
|
||||
return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
|
||||
pfn, vmf->flags & FAULT_FLAG_WRITE);
|
||||
|
||||
unlock_fallback:
|
||||
dax_read_unlock(id);
|
||||
fallback:
|
||||
trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, pfn, ret);
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
/*
|
||||
* The 'colour' (ie low bits) within a PMD of a page offset. This comes up
|
||||
* more often than one might expect in the below functions.
|
||||
*/
|
||||
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
|
||||
|
||||
static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
|
||||
void *entry)
|
||||
@ -1288,7 +1292,7 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
|
||||
goto fallback;
|
||||
|
||||
ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
|
||||
RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE);
|
||||
RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
|
||||
if (IS_ERR(ret))
|
||||
goto fallback;
|
||||
|
||||
@ -1310,13 +1314,14 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
|
||||
static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
||||
const struct iomap_ops *ops)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
unsigned long pmd_addr = vmf->address & PMD_MASK;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
bool sync;
|
||||
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
|
||||
struct inode *inode = mapping->host;
|
||||
int result = VM_FAULT_FALLBACK;
|
||||
@ -1325,6 +1330,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
void *entry;
|
||||
loff_t pos;
|
||||
int error;
|
||||
pfn_t pfn;
|
||||
|
||||
/*
|
||||
* Check whether offset isn't beyond end of file now. Caller is
|
||||
@ -1332,7 +1338,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
* this is a reliable test.
|
||||
*/
|
||||
pgoff = linear_page_index(vma, pmd_addr);
|
||||
max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
|
||||
max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
||||
|
||||
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
|
||||
|
||||
@ -1356,13 +1362,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
|
||||
goto fallback;
|
||||
|
||||
if (pgoff > max_pgoff) {
|
||||
if (pgoff >= max_pgoff) {
|
||||
result = VM_FAULT_SIGBUS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If the PMD would extend beyond the file size */
|
||||
if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
|
||||
if ((pgoff | PG_PMD_COLOUR) >= max_pgoff)
|
||||
goto fallback;
|
||||
|
||||
/*
|
||||
@ -1400,9 +1406,37 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
if (iomap.offset + iomap.length < pos + PMD_SIZE)
|
||||
goto finish_iomap;
|
||||
|
||||
sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap);
|
||||
|
||||
switch (iomap.type) {
|
||||
case IOMAP_MAPPED:
|
||||
result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry);
|
||||
error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
|
||||
if (error < 0)
|
||||
goto finish_iomap;
|
||||
|
||||
entry = dax_insert_mapping_entry(mapping, vmf, entry,
|
||||
dax_iomap_sector(&iomap, pos),
|
||||
RADIX_DAX_PMD, write && !sync);
|
||||
if (IS_ERR(entry))
|
||||
goto finish_iomap;
|
||||
|
||||
/*
|
||||
* If we are doing synchronous page fault and inode needs fsync,
|
||||
* we can insert PMD into page tables only after that happens.
|
||||
* Skip insertion for now and return the pfn so that caller can
|
||||
* insert it after fsync is done.
|
||||
*/
|
||||
if (sync) {
|
||||
if (WARN_ON_ONCE(!pfnp))
|
||||
goto finish_iomap;
|
||||
*pfnp = pfn;
|
||||
result = VM_FAULT_NEEDDSYNC;
|
||||
goto finish_iomap;
|
||||
}
|
||||
|
||||
trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
|
||||
result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
|
||||
write);
|
||||
break;
|
||||
case IOMAP_UNWRITTEN:
|
||||
case IOMAP_HOLE:
|
||||
@ -1442,7 +1476,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
return result;
|
||||
}
|
||||
#else
|
||||
static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
||||
const struct iomap_ops *ops)
|
||||
{
|
||||
return VM_FAULT_FALLBACK;
|
||||
@ -1452,7 +1486,9 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
/**
|
||||
* dax_iomap_fault - handle a page fault on a DAX file
|
||||
* @vmf: The description of the fault
|
||||
* @ops: iomap ops passed from the file system
|
||||
* @pe_size: Size of the page to fault in
|
||||
* @pfnp: PFN to insert for synchronous faults if fsync is required
|
||||
* @ops: Iomap ops passed from the file system
|
||||
*
|
||||
* When a page fault occurs, filesystems may call this helper in
|
||||
* their fault handler for DAX files. dax_iomap_fault() assumes the caller
|
||||
@ -1460,15 +1496,98 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
* successfully.
|
||||
*/
|
||||
int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
|
||||
const struct iomap_ops *ops)
|
||||
pfn_t *pfnp, const struct iomap_ops *ops)
|
||||
{
|
||||
switch (pe_size) {
|
||||
case PE_SIZE_PTE:
|
||||
return dax_iomap_pte_fault(vmf, ops);
|
||||
return dax_iomap_pte_fault(vmf, pfnp, ops);
|
||||
case PE_SIZE_PMD:
|
||||
return dax_iomap_pmd_fault(vmf, ops);
|
||||
return dax_iomap_pmd_fault(vmf, pfnp, ops);
|
||||
default:
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_iomap_fault);
|
||||
|
||||
/**
|
||||
* dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
|
||||
* @vmf: The description of the fault
|
||||
* @pe_size: Size of entry to be inserted
|
||||
* @pfn: PFN to insert
|
||||
*
|
||||
* This function inserts writeable PTE or PMD entry into page tables for mmaped
|
||||
* DAX file. It takes care of marking corresponding radix tree entry as dirty
|
||||
* as well.
|
||||
*/
|
||||
static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
|
||||
enum page_entry_size pe_size,
|
||||
pfn_t pfn)
|
||||
{
|
||||
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
||||
void *entry, **slot;
|
||||
pgoff_t index = vmf->pgoff;
|
||||
int vmf_ret, error;
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
entry = get_unlocked_mapping_entry(mapping, index, &slot);
|
||||
/* Did we race with someone splitting entry or so? */
|
||||
if (!entry ||
|
||||
(pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
|
||||
(pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
|
||||
put_unlocked_mapping_entry(mapping, index, entry);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
|
||||
VM_FAULT_NOPAGE);
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY);
|
||||
entry = lock_slot(mapping, slot);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
switch (pe_size) {
|
||||
case PE_SIZE_PTE:
|
||||
error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
|
||||
vmf_ret = dax_fault_return(error);
|
||||
break;
|
||||
#ifdef CONFIG_FS_DAX_PMD
|
||||
case PE_SIZE_PMD:
|
||||
vmf_ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
|
||||
pfn, true);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
vmf_ret = VM_FAULT_FALLBACK;
|
||||
}
|
||||
put_locked_mapping_entry(mapping, index);
|
||||
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, vmf_ret);
|
||||
return vmf_ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* dax_finish_sync_fault - finish synchronous page fault
|
||||
* @vmf: The description of the fault
|
||||
* @pe_size: Size of entry to be inserted
|
||||
* @pfn: PFN to insert
|
||||
*
|
||||
* This function ensures that the file range touched by the page fault is
|
||||
* stored persistently on the media and handles inserting of appropriate page
|
||||
* table entry.
|
||||
*/
|
||||
int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
|
||||
pfn_t pfn)
|
||||
{
|
||||
int err;
|
||||
loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
|
||||
size_t len = 0;
|
||||
|
||||
if (pe_size == PE_SIZE_PTE)
|
||||
len = PAGE_SIZE;
|
||||
else if (pe_size == PE_SIZE_PMD)
|
||||
len = PMD_SIZE;
|
||||
else
|
||||
WARN_ON_ONCE(1);
|
||||
err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
|
||||
if (err)
|
||||
return VM_FAULT_SIGBUS;
|
||||
return dax_insert_pfn_mkwrite(vmf, pe_size, pfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
|
||||
|
@ -100,7 +100,7 @@ static int ext2_dax_fault(struct vm_fault *vmf)
|
||||
}
|
||||
down_read(&ei->dax_sem);
|
||||
|
||||
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &ext2_iomap_ops);
|
||||
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &ext2_iomap_ops);
|
||||
|
||||
up_read(&ei->dax_sem);
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/mman.h>
|
||||
#include "ext4.h"
|
||||
#include "ext4_jbd2.h"
|
||||
#include "xattr.h"
|
||||
@ -297,6 +298,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
|
||||
*/
|
||||
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
|
||||
(vmf->vma->vm_flags & VM_SHARED);
|
||||
pfn_t pfn;
|
||||
|
||||
if (write) {
|
||||
sb_start_pagefault(sb);
|
||||
@ -304,16 +306,20 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
|
||||
EXT4_DATA_TRANS_BLOCKS(sb));
|
||||
if (IS_ERR(handle)) {
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
sb_end_pagefault(sb);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
} else {
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
}
|
||||
if (!IS_ERR(handle))
|
||||
result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
|
||||
else
|
||||
result = VM_FAULT_SIGBUS;
|
||||
result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops);
|
||||
if (write) {
|
||||
if (!IS_ERR(handle))
|
||||
ext4_journal_stop(handle);
|
||||
ext4_journal_stop(handle);
|
||||
/* Handling synchronous page fault? */
|
||||
if (result & VM_FAULT_NEEDDSYNC)
|
||||
result = dax_finish_sync_fault(vmf, pe_size, pfn);
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
sb_end_pagefault(sb);
|
||||
} else {
|
||||
@ -351,6 +357,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* We don't support synchronous mappings for non-DAX files. At least
|
||||
* until someone comes with a sensible use case.
|
||||
*/
|
||||
if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
file_accessed(file);
|
||||
if (IS_DAX(file_inode(file))) {
|
||||
vma->vm_ops = &ext4_dax_vm_ops;
|
||||
@ -469,6 +482,7 @@ const struct file_operations ext4_file_operations = {
|
||||
.compat_ioctl = ext4_compat_ioctl,
|
||||
#endif
|
||||
.mmap = ext4_file_mmap,
|
||||
.mmap_supported_flags = MAP_SYNC,
|
||||
.open = ext4_file_open,
|
||||
.release = ext4_release_file,
|
||||
.fsync = ext4_sync_file,
|
||||
|
@ -3384,6 +3384,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
|
||||
return try_to_free_buffers(page);
|
||||
}
|
||||
|
||||
static bool ext4_inode_datasync_dirty(struct inode *inode)
|
||||
{
|
||||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
|
||||
if (journal)
|
||||
return !jbd2_transaction_committed(journal,
|
||||
EXT4_I(inode)->i_datasync_tid);
|
||||
/* Any metadata buffers to write? */
|
||||
if (!list_empty(&inode->i_mapping->private_list))
|
||||
return true;
|
||||
return inode->i_state & I_DIRTY_DATASYNC;
|
||||
}
|
||||
|
||||
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
unsigned flags, struct iomap *iomap)
|
||||
{
|
||||
@ -3497,6 +3510,8 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
}
|
||||
|
||||
iomap->flags = 0;
|
||||
if (ext4_inode_datasync_dirty(inode))
|
||||
iomap->flags |= IOMAP_F_DIRTY;
|
||||
iomap->bdev = inode->i_sb->s_bdev;
|
||||
iomap->dax_dev = sbi->s_daxdev;
|
||||
iomap->offset = first_block << blkbits;
|
||||
|
@ -737,6 +737,23 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Return 1 when transaction with given tid has already committed. */
|
||||
int jbd2_transaction_committed(journal_t *journal, tid_t tid)
|
||||
{
|
||||
int ret = 1;
|
||||
|
||||
read_lock(&journal->j_state_lock);
|
||||
if (journal->j_running_transaction &&
|
||||
journal->j_running_transaction->t_tid == tid)
|
||||
ret = 0;
|
||||
if (journal->j_committing_transaction &&
|
||||
journal->j_committing_transaction->t_tid == tid)
|
||||
ret = 0;
|
||||
read_unlock(&journal->j_state_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_transaction_committed);
|
||||
|
||||
/*
|
||||
* When this function returns the transaction corresponding to tid
|
||||
* will be completed. If the transaction has currently running, start
|
||||
|
@ -661,6 +661,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
|
||||
[ilog2(VM_ACCOUNT)] = "ac",
|
||||
[ilog2(VM_NORESERVE)] = "nr",
|
||||
[ilog2(VM_HUGETLB)] = "ht",
|
||||
[ilog2(VM_SYNC)] = "sf",
|
||||
[ilog2(VM_ARCH_1)] = "ar",
|
||||
[ilog2(VM_WIPEONFORK)] = "wf",
|
||||
[ilog2(VM_DONTDUMP)] = "dd",
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include <linux/falloc.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/mman.h>
|
||||
|
||||
static const struct vm_operations_struct xfs_file_vm_ops;
|
||||
|
||||
@ -1045,7 +1046,11 @@ __xfs_filemap_fault(
|
||||
|
||||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
if (IS_DAX(inode)) {
|
||||
ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops);
|
||||
pfn_t pfn;
|
||||
|
||||
ret = dax_iomap_fault(vmf, pe_size, &pfn, &xfs_iomap_ops);
|
||||
if (ret & VM_FAULT_NEEDDSYNC)
|
||||
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
|
||||
} else {
|
||||
if (write_fault)
|
||||
ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
|
||||
@ -1090,37 +1095,16 @@ xfs_filemap_page_mkwrite(
|
||||
}
|
||||
|
||||
/*
|
||||
* pfn_mkwrite was originally inteneded to ensure we capture time stamp
|
||||
* updates on write faults. In reality, it's need to serialise against
|
||||
* truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
|
||||
* to ensure we serialise the fault barrier in place.
|
||||
* pfn_mkwrite was originally intended to ensure we capture time stamp updates
|
||||
* on write faults. In reality, it needs to serialise against truncate and
|
||||
* prepare memory for writing so handle is as standard write fault.
|
||||
*/
|
||||
static int
|
||||
xfs_filemap_pfn_mkwrite(
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
|
||||
struct inode *inode = file_inode(vmf->vma->vm_file);
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
int ret = VM_FAULT_NOPAGE;
|
||||
loff_t size;
|
||||
|
||||
trace_xfs_filemap_pfn_mkwrite(ip);
|
||||
|
||||
sb_start_pagefault(inode->i_sb);
|
||||
file_update_time(vmf->vma->vm_file);
|
||||
|
||||
/* check if the faulting page hasn't raced with truncate */
|
||||
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (vmf->pgoff >= size)
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
else if (IS_DAX(inode))
|
||||
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
|
||||
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
return ret;
|
||||
|
||||
return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct xfs_file_vm_ops = {
|
||||
@ -1136,6 +1120,13 @@ xfs_file_mmap(
|
||||
struct file *filp,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
/*
|
||||
* We don't support synchronous mappings for non-DAX files. At least
|
||||
* until someone comes with a sensible use case.
|
||||
*/
|
||||
if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
file_accessed(filp);
|
||||
vma->vm_ops = &xfs_file_vm_ops;
|
||||
if (IS_DAX(file_inode(filp)))
|
||||
@ -1154,6 +1145,7 @@ const struct file_operations xfs_file_operations = {
|
||||
.compat_ioctl = xfs_file_compat_ioctl,
|
||||
#endif
|
||||
.mmap = xfs_file_mmap,
|
||||
.mmap_supported_flags = MAP_SYNC,
|
||||
.open = xfs_file_open,
|
||||
.release = xfs_file_release,
|
||||
.fsync = xfs_file_fsync,
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_trans_space.h"
|
||||
#include "xfs_inode_item.h"
|
||||
#include "xfs_iomap.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_icache.h"
|
||||
@ -1089,6 +1090,10 @@ xfs_file_iomap_begin(
|
||||
trace_xfs_iomap_found(ip, offset, length, 0, &imap);
|
||||
}
|
||||
|
||||
if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields
|
||||
& ~XFS_ILOG_TIMESTAMP))
|
||||
iomap->flags |= IOMAP_F_DIRTY;
|
||||
|
||||
xfs_bmbt_to_iomap(ip, iomap, &imap);
|
||||
|
||||
if (shared)
|
||||
|
@ -654,8 +654,6 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
|
||||
DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
|
||||
DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
|
||||
|
||||
DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
|
||||
|
||||
TRACE_EVENT(xfs_filemap_fault,
|
||||
TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
|
||||
bool write_fault),
|
||||
|
@ -96,7 +96,9 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev);
|
||||
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||
const struct iomap_ops *ops);
|
||||
int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
|
||||
const struct iomap_ops *ops);
|
||||
pfn_t *pfnp, const struct iomap_ops *ops);
|
||||
int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
|
||||
pfn_t pfn);
|
||||
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
|
||||
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
||||
pgoff_t index);
|
||||
|
@ -1702,6 +1702,7 @@ struct file_operations {
|
||||
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
|
||||
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
|
||||
int (*mmap) (struct file *, struct vm_area_struct *);
|
||||
unsigned long mmap_supported_flags;
|
||||
int (*open) (struct inode *, struct file *);
|
||||
int (*flush) (struct file *, fl_owner_t id);
|
||||
int (*release) (struct inode *, struct file *);
|
||||
|
@ -21,9 +21,13 @@ struct vm_fault;
|
||||
|
||||
/*
|
||||
* Flags for all iomap mappings:
|
||||
*
|
||||
* IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access
|
||||
* written data and requires fdatasync to commit them to persistent storage.
|
||||
*/
|
||||
#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */
|
||||
#define IOMAP_F_BOUNDARY 0x02 /* mapping ends at metadata boundary */
|
||||
#define IOMAP_F_DIRTY 0x04 /* uncommitted metadata */
|
||||
|
||||
/*
|
||||
* Flags that only need to be reported for IOMAP_REPORT requests:
|
||||
|
@ -1367,6 +1367,7 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid);
|
||||
int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
|
||||
int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
|
||||
int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
|
||||
int jbd2_transaction_committed(journal_t *journal, tid_t tid);
|
||||
int jbd2_complete_transaction(journal_t *journal, tid_t tid);
|
||||
int jbd2_log_do_checkpoint(journal_t *journal);
|
||||
int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
|
||||
|
@ -18,6 +18,18 @@
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/uuid.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
struct badrange_entry {
|
||||
u64 start;
|
||||
u64 length;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct badrange {
|
||||
struct list_head list;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
enum {
|
||||
/* when a dimm supports both PMEM and BLK access a label is required */
|
||||
@ -129,9 +141,12 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
|
||||
|
||||
}
|
||||
|
||||
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
|
||||
void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus,
|
||||
phys_addr_t start, unsigned int len);
|
||||
void badrange_init(struct badrange *badrange);
|
||||
int badrange_add(struct badrange *badrange, u64 addr, u64 length);
|
||||
void badrange_forget(struct badrange *badrange, phys_addr_t start,
|
||||
unsigned int len);
|
||||
int nvdimm_bus_add_badrange(struct nvdimm_bus *nvdimm_bus, u64 addr,
|
||||
u64 length);
|
||||
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
|
||||
struct nvdimm_bus_descriptor *nfit_desc);
|
||||
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
|
||||
|
@ -199,6 +199,7 @@ extern unsigned int kobjsize(const void *objp);
|
||||
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
|
||||
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
|
||||
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
|
||||
#define VM_SYNC 0x00800000 /* Synchronous page faults */
|
||||
#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
|
||||
#define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */
|
||||
#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
|
||||
@ -1191,8 +1192,9 @@ static inline void clear_page_pfmemalloc(struct page *page)
|
||||
#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */
|
||||
#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */
|
||||
#define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */
|
||||
|
||||
#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
|
||||
#define VM_FAULT_NEEDDSYNC 0x2000 /* ->fault did not modify page tables
|
||||
* and needs fsync() to complete (for
|
||||
* synchronous page faults in DAX) */
|
||||
|
||||
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
|
||||
VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
|
||||
@ -1210,7 +1212,8 @@ static inline void clear_page_pfmemalloc(struct page *page)
|
||||
{ VM_FAULT_LOCKED, "LOCKED" }, \
|
||||
{ VM_FAULT_RETRY, "RETRY" }, \
|
||||
{ VM_FAULT_FALLBACK, "FALLBACK" }, \
|
||||
{ VM_FAULT_DONE_COW, "DONE_COW" }
|
||||
{ VM_FAULT_DONE_COW, "DONE_COW" }, \
|
||||
{ VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }
|
||||
|
||||
/* Encode hstate index for a hwpoisoned large page */
|
||||
#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
|
||||
|
@ -8,6 +8,48 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <uapi/linux/mman.h>
|
||||
|
||||
/*
|
||||
* Arrange for legacy / undefined architecture specific flags to be
|
||||
* ignored by mmap handling code.
|
||||
*/
|
||||
#ifndef MAP_32BIT
|
||||
#define MAP_32BIT 0
|
||||
#endif
|
||||
#ifndef MAP_HUGE_2MB
|
||||
#define MAP_HUGE_2MB 0
|
||||
#endif
|
||||
#ifndef MAP_HUGE_1GB
|
||||
#define MAP_HUGE_1GB 0
|
||||
#endif
|
||||
#ifndef MAP_UNINITIALIZED
|
||||
#define MAP_UNINITIALIZED 0
|
||||
#endif
|
||||
#ifndef MAP_SYNC
|
||||
#define MAP_SYNC 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The historical set of flags that all mmap implementations implicitly
|
||||
* support when a ->mmap_validate() op is not provided in file_operations.
|
||||
*/
|
||||
#define LEGACY_MAP_MASK (MAP_SHARED \
|
||||
| MAP_PRIVATE \
|
||||
| MAP_FIXED \
|
||||
| MAP_ANONYMOUS \
|
||||
| MAP_DENYWRITE \
|
||||
| MAP_EXECUTABLE \
|
||||
| MAP_UNINITIALIZED \
|
||||
| MAP_GROWSDOWN \
|
||||
| MAP_LOCKED \
|
||||
| MAP_NORESERVE \
|
||||
| MAP_POPULATE \
|
||||
| MAP_NONBLOCK \
|
||||
| MAP_STACK \
|
||||
| MAP_HUGETLB \
|
||||
| MAP_32BIT \
|
||||
| MAP_HUGE_2MB \
|
||||
| MAP_HUGE_1GB)
|
||||
|
||||
extern int sysctl_overcommit_memory;
|
||||
extern int sysctl_overcommit_ratio;
|
||||
extern unsigned long sysctl_overcommit_kbytes;
|
||||
@ -64,8 +106,9 @@ static inline bool arch_validate_prot(unsigned long prot)
|
||||
* ("bit1" and "bit2" must be single bits)
|
||||
*/
|
||||
#define _calc_vm_trans(x, bit1, bit2) \
|
||||
((!(bit1) || !(bit2)) ? 0 : \
|
||||
((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
|
||||
: ((x) & (bit1)) / ((bit1) / (bit2)))
|
||||
: ((x) & (bit1)) / ((bit1) / (bit2))))
|
||||
|
||||
/*
|
||||
* Combine the mmap "prot" argument into "vm_flags" used internally.
|
||||
@ -87,7 +130,8 @@ calc_vm_flag_bits(unsigned long flags)
|
||||
{
|
||||
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
|
||||
_calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
|
||||
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED );
|
||||
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
|
||||
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC );
|
||||
}
|
||||
|
||||
unsigned long vm_commit_limit(void);
|
||||
|
@ -149,7 +149,6 @@ DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \
|
||||
TP_ARGS(inode, vmf, length, pfn, radix_entry))
|
||||
|
||||
DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping);
|
||||
DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback);
|
||||
|
||||
DECLARE_EVENT_CLASS(dax_pte_fault_class,
|
||||
TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result),
|
||||
@ -192,6 +191,8 @@ DEFINE_EVENT(dax_pte_fault_class, name, \
|
||||
DEFINE_PTE_FAULT_EVENT(dax_pte_fault);
|
||||
DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done);
|
||||
DEFINE_PTE_FAULT_EVENT(dax_load_hole);
|
||||
DEFINE_PTE_FAULT_EVENT(dax_insert_pfn_mkwrite_no_entry);
|
||||
DEFINE_PTE_FAULT_EVENT(dax_insert_pfn_mkwrite);
|
||||
|
||||
TRACE_EVENT(dax_insert_mapping,
|
||||
TP_PROTO(struct inode *inode, struct vm_fault *vmf, void *radix_entry),
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#define MAP_SHARED 0x01 /* Share changes */
|
||||
#define MAP_PRIVATE 0x02 /* Changes are private */
|
||||
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
|
||||
#define MAP_TYPE 0x0f /* Mask for type of mapping */
|
||||
#define MAP_FIXED 0x10 /* Interpret addr exactly */
|
||||
#define MAP_ANONYMOUS 0x20 /* don't use a file */
|
||||
|
@ -13,6 +13,7 @@
|
||||
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
|
||||
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
|
||||
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
|
||||
#define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */
|
||||
|
||||
/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
|
||||
|
||||
|
15
mm/mmap.c
15
mm/mmap.c
@ -1387,9 +1387,24 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
|
||||
|
||||
if (file) {
|
||||
struct inode *inode = file_inode(file);
|
||||
unsigned long flags_mask;
|
||||
|
||||
flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
|
||||
|
||||
switch (flags & MAP_TYPE) {
|
||||
case MAP_SHARED:
|
||||
/*
|
||||
* Force use of MAP_SHARED_VALIDATE with non-legacy
|
||||
* flags. E.g. MAP_SYNC is dangerous to use with
|
||||
* MAP_SHARED as you don't know which consistency model
|
||||
* you will get. We silently ignore unsupported flags
|
||||
* with MAP_SHARED to preserve backward compatibility.
|
||||
*/
|
||||
flags &= LEGACY_MAP_MASK;
|
||||
/* fall through */
|
||||
case MAP_SHARED_VALIDATE:
|
||||
if (flags & ~flags_mask)
|
||||
return -EOPNOTSUPP;
|
||||
if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
|
||||
return -EACCES;
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#define MAP_SHARED 0x01 /* Share changes */
|
||||
#define MAP_PRIVATE 0x02 /* Changes are private */
|
||||
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
|
||||
#define MAP_TYPE 0x0f /* Mask for type of mapping */
|
||||
#define MAP_FIXED 0x10 /* Interpret addr exactly */
|
||||
#define MAP_ANONYMOUS 0x20 /* don't use a file */
|
||||
|
@ -70,6 +70,7 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
|
||||
libnvdimm-y += $(NVDIMM_SRC)/region.o
|
||||
libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
|
||||
libnvdimm-y += $(NVDIMM_SRC)/label.o
|
||||
libnvdimm-y += $(NVDIMM_SRC)/badrange.o
|
||||
libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
|
||||
libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
|
||||
libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
|
||||
|
@ -168,8 +168,12 @@ struct nfit_test {
|
||||
spinlock_t lock;
|
||||
} ars_state;
|
||||
struct device *dimm_dev[NUM_DCR];
|
||||
struct badrange badrange;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
static struct workqueue_struct *nfit_wq;
|
||||
|
||||
static struct nfit_test *to_nfit_test(struct device *dev)
|
||||
{
|
||||
struct platform_device *pdev = to_platform_device(dev);
|
||||
@ -234,48 +238,68 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
|
||||
return rc;
|
||||
}
|
||||
|
||||
#define NFIT_TEST_ARS_RECORDS 4
|
||||
#define NFIT_TEST_CLEAR_ERR_UNIT 256
|
||||
|
||||
static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
|
||||
unsigned int buf_len)
|
||||
{
|
||||
int ars_recs;
|
||||
|
||||
if (buf_len < sizeof(*nd_cmd))
|
||||
return -EINVAL;
|
||||
|
||||
/* for testing, only store up to n records that fit within 4k */
|
||||
ars_recs = SZ_4K / sizeof(struct nd_ars_record);
|
||||
|
||||
nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
|
||||
+ NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record);
|
||||
+ ars_recs * sizeof(struct nd_ars_record);
|
||||
nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
|
||||
nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the ars_state to return an ars_result 1 second in the future with
|
||||
* a 4K error range in the middle of the requested address range.
|
||||
*/
|
||||
static void post_ars_status(struct ars_state *ars_state, u64 addr, u64 len)
|
||||
static void post_ars_status(struct ars_state *ars_state,
|
||||
struct badrange *badrange, u64 addr, u64 len)
|
||||
{
|
||||
struct nd_cmd_ars_status *ars_status;
|
||||
struct nd_ars_record *ars_record;
|
||||
struct badrange_entry *be;
|
||||
u64 end = addr + len - 1;
|
||||
int i = 0;
|
||||
|
||||
ars_state->deadline = jiffies + 1*HZ;
|
||||
ars_status = ars_state->ars_status;
|
||||
ars_status->status = 0;
|
||||
ars_status->out_length = sizeof(struct nd_cmd_ars_status)
|
||||
+ sizeof(struct nd_ars_record);
|
||||
ars_status->address = addr;
|
||||
ars_status->length = len;
|
||||
ars_status->type = ND_ARS_PERSISTENT;
|
||||
ars_status->num_records = 1;
|
||||
ars_record = &ars_status->records[0];
|
||||
ars_record->handle = 0;
|
||||
ars_record->err_address = addr + len / 2;
|
||||
ars_record->length = SZ_4K;
|
||||
|
||||
spin_lock(&badrange->lock);
|
||||
list_for_each_entry(be, &badrange->list, list) {
|
||||
u64 be_end = be->start + be->length - 1;
|
||||
u64 rstart, rend;
|
||||
|
||||
/* skip entries outside the range */
|
||||
if (be_end < addr || be->start > end)
|
||||
continue;
|
||||
|
||||
rstart = (be->start < addr) ? addr : be->start;
|
||||
rend = (be_end < end) ? be_end : end;
|
||||
ars_record = &ars_status->records[i];
|
||||
ars_record->handle = 0;
|
||||
ars_record->err_address = rstart;
|
||||
ars_record->length = rend - rstart + 1;
|
||||
i++;
|
||||
}
|
||||
spin_unlock(&badrange->lock);
|
||||
ars_status->num_records = i;
|
||||
ars_status->out_length = sizeof(struct nd_cmd_ars_status)
|
||||
+ i * sizeof(struct nd_ars_record);
|
||||
}
|
||||
|
||||
static int nfit_test_cmd_ars_start(struct ars_state *ars_state,
|
||||
static int nfit_test_cmd_ars_start(struct nfit_test *t,
|
||||
struct ars_state *ars_state,
|
||||
struct nd_cmd_ars_start *ars_start, unsigned int buf_len,
|
||||
int *cmd_rc)
|
||||
{
|
||||
@ -289,7 +313,7 @@ static int nfit_test_cmd_ars_start(struct ars_state *ars_state,
|
||||
} else {
|
||||
ars_start->status = 0;
|
||||
ars_start->scrub_time = 1;
|
||||
post_ars_status(ars_state, ars_start->address,
|
||||
post_ars_status(ars_state, &t->badrange, ars_start->address,
|
||||
ars_start->length);
|
||||
*cmd_rc = 0;
|
||||
}
|
||||
@ -320,7 +344,8 @@ static int nfit_test_cmd_ars_status(struct ars_state *ars_state,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err,
|
||||
static int nfit_test_cmd_clear_error(struct nfit_test *t,
|
||||
struct nd_cmd_clear_error *clear_err,
|
||||
unsigned int buf_len, int *cmd_rc)
|
||||
{
|
||||
const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1;
|
||||
@ -330,18 +355,91 @@ static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err,
|
||||
if ((clear_err->address & mask) || (clear_err->length & mask))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Report 'all clear' success for all commands even though a new
|
||||
* scrub will find errors again. This is enough to have the
|
||||
* error removed from the 'badblocks' tracking in the pmem
|
||||
* driver.
|
||||
*/
|
||||
badrange_forget(&t->badrange, clear_err->address, clear_err->length);
|
||||
clear_err->status = 0;
|
||||
clear_err->cleared = clear_err->length;
|
||||
*cmd_rc = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct region_search_spa {
|
||||
u64 addr;
|
||||
struct nd_region *region;
|
||||
};
|
||||
|
||||
static int is_region_device(struct device *dev)
|
||||
{
|
||||
return !strncmp(dev->kobj.name, "region", 6);
|
||||
}
|
||||
|
||||
static int nfit_test_search_region_spa(struct device *dev, void *data)
|
||||
{
|
||||
struct region_search_spa *ctx = data;
|
||||
struct nd_region *nd_region;
|
||||
resource_size_t ndr_end;
|
||||
|
||||
if (!is_region_device(dev))
|
||||
return 0;
|
||||
|
||||
nd_region = to_nd_region(dev);
|
||||
ndr_end = nd_region->ndr_start + nd_region->ndr_size;
|
||||
|
||||
if (ctx->addr >= nd_region->ndr_start && ctx->addr < ndr_end) {
|
||||
ctx->region = nd_region;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nfit_test_search_spa(struct nvdimm_bus *bus,
|
||||
struct nd_cmd_translate_spa *spa)
|
||||
{
|
||||
int ret;
|
||||
struct nd_region *nd_region = NULL;
|
||||
struct nvdimm *nvdimm = NULL;
|
||||
struct nd_mapping *nd_mapping = NULL;
|
||||
struct region_search_spa ctx = {
|
||||
.addr = spa->spa,
|
||||
.region = NULL,
|
||||
};
|
||||
u64 dpa;
|
||||
|
||||
ret = device_for_each_child(&bus->dev, &ctx,
|
||||
nfit_test_search_region_spa);
|
||||
|
||||
if (!ret)
|
||||
return -ENODEV;
|
||||
|
||||
nd_region = ctx.region;
|
||||
|
||||
dpa = ctx.addr - nd_region->ndr_start;
|
||||
|
||||
/*
|
||||
* last dimm is selected for test
|
||||
*/
|
||||
nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
|
||||
nvdimm = nd_mapping->nvdimm;
|
||||
|
||||
spa->devices[0].nfit_device_handle = handle[nvdimm->id];
|
||||
spa->num_nvdimms = 1;
|
||||
spa->devices[0].dpa = dpa;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
|
||||
struct nd_cmd_translate_spa *spa, unsigned int buf_len)
|
||||
{
|
||||
if (buf_len < spa->translate_length)
|
||||
return -EINVAL;
|
||||
|
||||
if (nfit_test_search_spa(bus, spa) < 0 || !spa->num_nvdimms)
|
||||
spa->status = 2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len)
|
||||
{
|
||||
static const struct nd_smart_payload smart_data = {
|
||||
@ -378,6 +476,93 @@ static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void uc_error_notify(struct work_struct *work)
|
||||
{
|
||||
struct nfit_test *t = container_of(work, typeof(*t), work);
|
||||
|
||||
__acpi_nfit_notify(&t->pdev.dev, t, NFIT_NOTIFY_UC_MEMORY_ERROR);
|
||||
}
|
||||
|
||||
static int nfit_test_cmd_ars_error_inject(struct nfit_test *t,
|
||||
struct nd_cmd_ars_err_inj *err_inj, unsigned int buf_len)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (buf_len != sizeof(*err_inj)) {
|
||||
rc = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (err_inj->err_inj_spa_range_length <= 0) {
|
||||
rc = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
rc = badrange_add(&t->badrange, err_inj->err_inj_spa_range_base,
|
||||
err_inj->err_inj_spa_range_length);
|
||||
if (rc < 0)
|
||||
goto err;
|
||||
|
||||
if (err_inj->err_inj_options & (1 << ND_ARS_ERR_INJ_OPT_NOTIFY))
|
||||
queue_work(nfit_wq, &t->work);
|
||||
|
||||
err_inj->status = 0;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
err_inj->status = NFIT_ARS_INJECT_INVALID;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int nfit_test_cmd_ars_inject_clear(struct nfit_test *t,
|
||||
struct nd_cmd_ars_err_inj_clr *err_clr, unsigned int buf_len)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (buf_len != sizeof(*err_clr)) {
|
||||
rc = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (err_clr->err_inj_clr_spa_range_length <= 0) {
|
||||
rc = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
badrange_forget(&t->badrange, err_clr->err_inj_clr_spa_range_base,
|
||||
err_clr->err_inj_clr_spa_range_length);
|
||||
|
||||
err_clr->status = 0;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
err_clr->status = NFIT_ARS_INJECT_INVALID;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
|
||||
struct nd_cmd_ars_err_inj_stat *err_stat,
|
||||
unsigned int buf_len)
|
||||
{
|
||||
struct badrange_entry *be;
|
||||
int max = SZ_4K / sizeof(struct nd_error_stat_query_record);
|
||||
int i = 0;
|
||||
|
||||
err_stat->status = 0;
|
||||
spin_lock(&t->badrange.lock);
|
||||
list_for_each_entry(be, &t->badrange.list, list) {
|
||||
err_stat->record[i].err_inj_stat_spa_range_base = be->start;
|
||||
err_stat->record[i].err_inj_stat_spa_range_length = be->length;
|
||||
i++;
|
||||
if (i > max)
|
||||
break;
|
||||
}
|
||||
spin_unlock(&t->badrange.lock);
|
||||
err_stat->inj_err_rec_count = i;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
|
||||
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
|
||||
unsigned int buf_len, int *cmd_rc)
|
||||
@ -449,6 +634,38 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
|
||||
}
|
||||
} else {
|
||||
struct ars_state *ars_state = &t->ars_state;
|
||||
struct nd_cmd_pkg *call_pkg = buf;
|
||||
|
||||
if (!nd_desc)
|
||||
return -ENOTTY;
|
||||
|
||||
if (cmd == ND_CMD_CALL) {
|
||||
func = call_pkg->nd_command;
|
||||
|
||||
buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
|
||||
buf = (void *) call_pkg->nd_payload;
|
||||
|
||||
switch (func) {
|
||||
case NFIT_CMD_TRANSLATE_SPA:
|
||||
rc = nfit_test_cmd_translate_spa(
|
||||
acpi_desc->nvdimm_bus, buf, buf_len);
|
||||
return rc;
|
||||
case NFIT_CMD_ARS_INJECT_SET:
|
||||
rc = nfit_test_cmd_ars_error_inject(t, buf,
|
||||
buf_len);
|
||||
return rc;
|
||||
case NFIT_CMD_ARS_INJECT_CLEAR:
|
||||
rc = nfit_test_cmd_ars_inject_clear(t, buf,
|
||||
buf_len);
|
||||
return rc;
|
||||
case NFIT_CMD_ARS_INJECT_GET:
|
||||
rc = nfit_test_cmd_ars_inject_status(t, buf,
|
||||
buf_len);
|
||||
return rc;
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
|
||||
return -ENOTTY;
|
||||
@ -458,15 +675,15 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
|
||||
rc = nfit_test_cmd_ars_cap(buf, buf_len);
|
||||
break;
|
||||
case ND_CMD_ARS_START:
|
||||
rc = nfit_test_cmd_ars_start(ars_state, buf, buf_len,
|
||||
cmd_rc);
|
||||
rc = nfit_test_cmd_ars_start(t, ars_state, buf,
|
||||
buf_len, cmd_rc);
|
||||
break;
|
||||
case ND_CMD_ARS_STATUS:
|
||||
rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len,
|
||||
cmd_rc);
|
||||
break;
|
||||
case ND_CMD_CLEAR_ERROR:
|
||||
rc = nfit_test_cmd_clear_error(buf, buf_len, cmd_rc);
|
||||
rc = nfit_test_cmd_clear_error(t, buf, buf_len, cmd_rc);
|
||||
break;
|
||||
default:
|
||||
return -ENOTTY;
|
||||
@ -566,10 +783,9 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
|
||||
|
||||
static int ars_state_init(struct device *dev, struct ars_state *ars_state)
|
||||
{
|
||||
/* for testing, only store up to n records that fit within 4k */
|
||||
ars_state->ars_status = devm_kzalloc(dev,
|
||||
sizeof(struct nd_cmd_ars_status)
|
||||
+ sizeof(struct nd_ars_record) * NFIT_TEST_ARS_RECORDS,
|
||||
GFP_KERNEL);
|
||||
sizeof(struct nd_cmd_ars_status) + SZ_4K, GFP_KERNEL);
|
||||
if (!ars_state->ars_status)
|
||||
return -ENOMEM;
|
||||
spin_lock_init(&ars_state->lock);
|
||||
@ -1419,7 +1635,8 @@ static void nfit_test0_setup(struct nfit_test *t)
|
||||
+ i * sizeof(u64);
|
||||
}
|
||||
|
||||
post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
|
||||
post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
|
||||
SPA0_SIZE);
|
||||
|
||||
acpi_desc = &t->acpi_desc;
|
||||
set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
|
||||
@ -1430,7 +1647,12 @@ static void nfit_test0_setup(struct nfit_test *t)
|
||||
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
|
||||
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
|
||||
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
|
||||
set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
|
||||
set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
|
||||
set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
|
||||
set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
|
||||
set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
|
||||
set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
|
||||
}
|
||||
|
||||
static void nfit_test1_setup(struct nfit_test *t)
|
||||
@ -1520,7 +1742,8 @@ static void nfit_test1_setup(struct nfit_test *t)
|
||||
dcr->code = NFIT_FIC_BYTE;
|
||||
dcr->windows = 0;
|
||||
|
||||
post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE);
|
||||
post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
|
||||
SPA2_SIZE);
|
||||
|
||||
acpi_desc = &t->acpi_desc;
|
||||
set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
|
||||
@ -1589,6 +1812,7 @@ static int nfit_ctl_test(struct device *dev)
|
||||
unsigned long mask, cmd_size, offset;
|
||||
union {
|
||||
struct nd_cmd_get_config_size cfg_size;
|
||||
struct nd_cmd_clear_error clear_err;
|
||||
struct nd_cmd_ars_status ars_stat;
|
||||
struct nd_cmd_ars_cap ars_cap;
|
||||
char buf[sizeof(struct nd_cmd_ars_status)
|
||||
@ -1613,10 +1837,15 @@ static int nfit_ctl_test(struct device *dev)
|
||||
.cmd_mask = 1UL << ND_CMD_ARS_CAP
|
||||
| 1UL << ND_CMD_ARS_START
|
||||
| 1UL << ND_CMD_ARS_STATUS
|
||||
| 1UL << ND_CMD_CLEAR_ERROR,
|
||||
| 1UL << ND_CMD_CLEAR_ERROR
|
||||
| 1UL << ND_CMD_CALL,
|
||||
.module = THIS_MODULE,
|
||||
.provider_name = "ACPI.NFIT",
|
||||
.ndctl = acpi_nfit_ctl,
|
||||
.bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
|
||||
| 1UL << NFIT_CMD_ARS_INJECT_SET
|
||||
| 1UL << NFIT_CMD_ARS_INJECT_CLEAR
|
||||
| 1UL << NFIT_CMD_ARS_INJECT_GET,
|
||||
},
|
||||
.dev = &adev->dev,
|
||||
};
|
||||
@ -1767,6 +1996,23 @@ static int nfit_ctl_test(struct device *dev)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* test clear error */
|
||||
cmd_size = sizeof(cmds.clear_err);
|
||||
cmds.clear_err = (struct nd_cmd_clear_error) {
|
||||
.length = 512,
|
||||
.cleared = 512,
|
||||
};
|
||||
rc = setup_result(cmds.buf, cmd_size);
|
||||
if (rc)
|
||||
return rc;
|
||||
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR,
|
||||
cmds.buf, cmd_size, &cmd_rc);
|
||||
if (rc < 0 || cmd_rc) {
|
||||
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
|
||||
__func__, __LINE__, rc, cmd_rc);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1915,6 +2161,10 @@ static __init int nfit_test_init(void)
|
||||
|
||||
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
|
||||
|
||||
nfit_wq = create_singlethread_workqueue("nfit");
|
||||
if (!nfit_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
|
||||
if (IS_ERR(nfit_test_dimm)) {
|
||||
rc = PTR_ERR(nfit_test_dimm);
|
||||
@ -1931,6 +2181,7 @@ static __init int nfit_test_init(void)
|
||||
goto err_register;
|
||||
}
|
||||
INIT_LIST_HEAD(&nfit_test->resources);
|
||||
badrange_init(&nfit_test->badrange);
|
||||
switch (i) {
|
||||
case 0:
|
||||
nfit_test->num_pm = NUM_PM;
|
||||
@ -1966,6 +2217,7 @@ static __init int nfit_test_init(void)
|
||||
goto err_register;
|
||||
|
||||
instances[i] = nfit_test;
|
||||
INIT_WORK(&nfit_test->work, uc_error_notify);
|
||||
}
|
||||
|
||||
rc = platform_driver_register(&nfit_test_driver);
|
||||
@ -1974,6 +2226,7 @@ static __init int nfit_test_init(void)
|
||||
return 0;
|
||||
|
||||
err_register:
|
||||
destroy_workqueue(nfit_wq);
|
||||
for (i = 0; i < NUM_NFITS; i++)
|
||||
if (instances[i])
|
||||
platform_device_unregister(&instances[i]->pdev);
|
||||
@ -1989,6 +2242,8 @@ static __exit void nfit_test_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
flush_workqueue(nfit_wq);
|
||||
destroy_workqueue(nfit_wq);
|
||||
for (i = 0; i < NUM_NFITS; i++)
|
||||
platform_device_unregister(&instances[i]->pdev);
|
||||
platform_driver_unregister(&nfit_test_driver);
|
||||
|
@ -32,6 +32,58 @@ struct nfit_test_resource {
|
||||
void *buf;
|
||||
};
|
||||
|
||||
#define ND_TRANSLATE_SPA_STATUS_INVALID_SPA 2
|
||||
#define NFIT_ARS_INJECT_INVALID 2
|
||||
|
||||
enum err_inj_options {
|
||||
ND_ARS_ERR_INJ_OPT_NOTIFY = 0,
|
||||
};
|
||||
|
||||
/* nfit commands */
|
||||
enum nfit_cmd_num {
|
||||
NFIT_CMD_TRANSLATE_SPA = 5,
|
||||
NFIT_CMD_ARS_INJECT_SET = 7,
|
||||
NFIT_CMD_ARS_INJECT_CLEAR = 8,
|
||||
NFIT_CMD_ARS_INJECT_GET = 9,
|
||||
};
|
||||
|
||||
struct nd_cmd_translate_spa {
|
||||
__u64 spa;
|
||||
__u32 status;
|
||||
__u8 flags;
|
||||
__u8 _reserved[3];
|
||||
__u64 translate_length;
|
||||
__u32 num_nvdimms;
|
||||
struct nd_nvdimm_device {
|
||||
__u32 nfit_device_handle;
|
||||
__u32 _reserved;
|
||||
__u64 dpa;
|
||||
} __packed devices[0];
|
||||
|
||||
} __packed;
|
||||
|
||||
struct nd_cmd_ars_err_inj {
|
||||
__u64 err_inj_spa_range_base;
|
||||
__u64 err_inj_spa_range_length;
|
||||
__u8 err_inj_options;
|
||||
__u32 status;
|
||||
} __packed;
|
||||
|
||||
struct nd_cmd_ars_err_inj_clr {
|
||||
__u64 err_inj_clr_spa_range_base;
|
||||
__u64 err_inj_clr_spa_range_length;
|
||||
__u32 status;
|
||||
} __packed;
|
||||
|
||||
struct nd_cmd_ars_err_inj_stat {
|
||||
__u32 status;
|
||||
__u32 inj_err_rec_count;
|
||||
struct nd_error_stat_query_record {
|
||||
__u64 err_inj_stat_spa_range_base;
|
||||
__u64 err_inj_stat_spa_range_length;
|
||||
} __packed record[0];
|
||||
} __packed;
|
||||
|
||||
union acpi_object;
|
||||
typedef void *acpi_handle;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user