mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
hmm related patches for 5.8
This series adds a selftest for hmm_range_fault() and several of the DEVICE_PRIVATE migration related actions, and another simplification for hmm_range_fault()'s API. - Simplify hmm_range_fault() with a simpler return code, no HMM_PFN_SPECIAL, and no customizable output PFN format - Add a selftest for hmm_range_fault() and DEVICE_PRIVATE related functionality -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEfB7FMLh+8QxL+6i3OG33FX4gmxoFAl7VQr8ACgkQOG33FX4g mxrpcg/+O+oZ2p8FDTZi/0BTaU0crUiKwJngmmv78UuvD8nzhOZ0fkhK2lsXn9Uo 70lYbfDUSX2TbReP7y39VArW0v+Bj7wo9/7AZ+R2o5A0ajC6kccjGdnb7uEc3L6v CR+uumRYf/ZNz13cbuRBbYEz477DGnz+3vhBb4FLNTFj9XiNAC61jA1WUI0ep6x3 lDrkhDatqmdBJ+EqZDMq2+UH+lWbkptQT7hPqgEp6o7FqdnySxRd+rT3hALz5wNP fbryfWXM7V1eh7Kxr2mBJJqIkgbdhGLj2yLl1Iz11BbG6u7AT20r23WTvJ7hUCyt 18574twdltZ81gheqqN7KVYYAo+5seMfP14QdthqzzBMo3pOeLG0JMVqQNisDPgn Tf4lWF/GR7ajKxyRbLdvUgRE7pFQ9VMAiP86GoIpBFmSZQQDwcecnoYxg60zsTwR yuf60gopfNsSWNmDqKT3td12PQyFQYHYT6ue1eW6Rb9P+yA++tZaGkvGFn7kHeNV ZeUqsKEy6a9l6cDrFzNmsCcdNZg/qmw9mKFfa/4RRulU5jlskt/e52NiLaLU2rsr 0Tot3j5tMufLLorZPprMI3Z/M9ohVAS5DkX6ttcZDs5v0iGQEUOOnq0cXmwlJQ9I 0CHr2ImjiDr9v2fS+5ixaRNSHfnQWnHxcqq79UZiTjtPW1Daauo= =twev -----END PGP SIGNATURE----- Merge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma Pull hmm updates from Jason Gunthorpe: "This series adds a selftest for hmm_range_fault() and several of the DEVICE_PRIVATE migration related actions, and another simplification for hmm_range_fault()'s API. - Simplify hmm_range_fault() with a simpler return code, no HMM_PFN_SPECIAL, and no customizable output PFN format - Add a selftest for hmm_range_fault() and DEVICE_PRIVATE related functionality" * tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: MAINTAINERS: add HMM selftests mm/hmm/test: add selftests for HMM mm/hmm/test: add selftest driver for HMM mm/hmm: remove the customizable pfn format from hmm_range_fault mm/hmm: remove HMM_PFN_SPECIAL drm/amdgpu: remove dead code after hmm_range_fault() mm/hmm: make hmm_range_fault return 0 or -1
This commit is contained in:
commit
cfa3b8068b
@ -161,7 +161,7 @@ device must complete the update before the driver callback returns.
|
||||
When the device driver wants to populate a range of virtual addresses, it can
|
||||
use::
|
||||
|
||||
long hmm_range_fault(struct hmm_range *range);
|
||||
int hmm_range_fault(struct hmm_range *range);
|
||||
|
||||
It will trigger a page fault on missing or read-only entries if write access is
|
||||
requested (see below). Page faults use the generic mm page fault code path just
|
||||
@ -184,10 +184,7 @@ The usage pattern is::
|
||||
range.notifier = &interval_sub;
|
||||
range.start = ...;
|
||||
range.end = ...;
|
||||
range.pfns = ...;
|
||||
range.flags = ...;
|
||||
range.values = ...;
|
||||
range.pfn_shift = ...;
|
||||
range.hmm_pfns = ...;
|
||||
|
||||
if (!mmget_not_zero(interval_sub->notifier.mm))
|
||||
return -EFAULT;
|
||||
@ -229,15 +226,10 @@ The hmm_range struct has 2 fields, default_flags and pfn_flags_mask, that specif
|
||||
fault or snapshot policy for the whole range instead of having to set them
|
||||
for each entry in the pfns array.
|
||||
|
||||
For instance, if the device flags for range.flags are::
|
||||
For instance if the device driver wants pages for a range with at least read
|
||||
permission, it sets::
|
||||
|
||||
range.flags[HMM_PFN_VALID] = (1 << 63);
|
||||
range.flags[HMM_PFN_WRITE] = (1 << 62);
|
||||
|
||||
and the device driver wants pages for a range with at least read permission,
|
||||
it sets::
|
||||
|
||||
range->default_flags = (1 << 63);
|
||||
range->default_flags = HMM_PFN_REQ_FAULT;
|
||||
range->pfn_flags_mask = 0;
|
||||
|
||||
and calls hmm_range_fault() as described above. This will fill fault all pages
|
||||
@ -246,18 +238,18 @@ in the range with at least read permission.
|
||||
Now let's say the driver wants to do the same except for one page in the range for
|
||||
which it wants to have write permission. Now driver set::
|
||||
|
||||
range->default_flags = (1 << 63);
|
||||
range->pfn_flags_mask = (1 << 62);
|
||||
range->pfns[index_of_write] = (1 << 62);
|
||||
range->default_flags = HMM_PFN_REQ_FAULT;
|
||||
range->pfn_flags_mask = HMM_PFN_REQ_WRITE;
|
||||
range->pfns[index_of_write] = HMM_PFN_REQ_WRITE;
|
||||
|
||||
With this, HMM will fault in all pages with at least read (i.e., valid) and for the
|
||||
address == range->start + (index_of_write << PAGE_SHIFT) it will fault with
|
||||
write permission i.e., if the CPU pte does not have write permission set then HMM
|
||||
will call handle_mm_fault().
|
||||
|
||||
Note that HMM will populate the pfns array with write permission for any page
|
||||
that is mapped with CPU write permission no matter what values are set
|
||||
in default_flags or pfn_flags_mask.
|
||||
After hmm_range_fault completes the flag bits are set to the current state of
|
||||
the page tables, ie HMM_PFN_VALID | HMM_PFN_WRITE will be set if the page is
|
||||
writable.
|
||||
|
||||
|
||||
Represent and manage device memory from core kernel point of view
|
||||
|
@ -7768,7 +7768,9 @@ L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: Documentation/vm/hmm.rst
|
||||
F: include/linux/hmm*
|
||||
F: lib/test_hmm*
|
||||
F: mm/hmm*
|
||||
F: tools/testing/selftests/vm/*hmm*
|
||||
|
||||
HOST AP DRIVER
|
||||
M: Jouni Malinen <j@w1.fi>
|
||||
|
@ -766,18 +766,6 @@ struct amdgpu_ttm_tt {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DRM_AMDGPU_USERPTR
|
||||
/* flags used by HMM internal, not related to CPU/GPU PTE flags */
|
||||
static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
|
||||
(1 << 0), /* HMM_PFN_VALID */
|
||||
(1 << 1), /* HMM_PFN_WRITE */
|
||||
};
|
||||
|
||||
static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
|
||||
0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
|
||||
0, /* HMM_PFN_NONE */
|
||||
0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
|
||||
* memory and start HMM tracking CPU page table update
|
||||
@ -816,18 +804,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
|
||||
goto out;
|
||||
}
|
||||
range->notifier = &bo->notifier;
|
||||
range->flags = hmm_range_flags;
|
||||
range->values = hmm_range_values;
|
||||
range->pfn_shift = PAGE_SHIFT;
|
||||
range->start = bo->notifier.interval_tree.start;
|
||||
range->end = bo->notifier.interval_tree.last + 1;
|
||||
range->default_flags = hmm_range_flags[HMM_PFN_VALID];
|
||||
range->default_flags = HMM_PFN_REQ_FAULT;
|
||||
if (!amdgpu_ttm_tt_is_readonly(ttm))
|
||||
range->default_flags |= range->flags[HMM_PFN_WRITE];
|
||||
range->default_flags |= HMM_PFN_REQ_WRITE;
|
||||
|
||||
range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
|
||||
GFP_KERNEL);
|
||||
if (unlikely(!range->pfns)) {
|
||||
range->hmm_pfns = kvmalloc_array(ttm->num_pages,
|
||||
sizeof(*range->hmm_pfns), GFP_KERNEL);
|
||||
if (unlikely(!range->hmm_pfns)) {
|
||||
r = -ENOMEM;
|
||||
goto out_free_ranges;
|
||||
}
|
||||
@ -852,27 +837,23 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
|
||||
down_read(&mm->mmap_sem);
|
||||
r = hmm_range_fault(range);
|
||||
up_read(&mm->mmap_sem);
|
||||
if (unlikely(r <= 0)) {
|
||||
if (unlikely(r)) {
|
||||
/*
|
||||
* FIXME: This timeout should encompass the retry from
|
||||
* mmu_interval_read_retry() as well.
|
||||
*/
|
||||
if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
|
||||
if (r == -EBUSY && !time_after(jiffies, timeout))
|
||||
goto retry;
|
||||
goto out_free_pfns;
|
||||
}
|
||||
|
||||
for (i = 0; i < ttm->num_pages; i++) {
|
||||
/* FIXME: The pages cannot be touched outside the notifier_lock */
|
||||
pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
|
||||
if (unlikely(!pages[i])) {
|
||||
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
|
||||
i, range->pfns[i]);
|
||||
r = -ENOMEM;
|
||||
|
||||
goto out_free_pfns;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Due to default_flags, all pages are HMM_PFN_VALID or
|
||||
* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
|
||||
* the notifier_lock, and mmu_interval_read_retry() must be done first.
|
||||
*/
|
||||
for (i = 0; i < ttm->num_pages; i++)
|
||||
pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
|
||||
|
||||
gtt->range = range;
|
||||
mmput(mm);
|
||||
@ -882,7 +863,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
|
||||
out_unlock:
|
||||
up_read(&mm->mmap_sem);
|
||||
out_free_pfns:
|
||||
kvfree(range->pfns);
|
||||
kvfree(range->hmm_pfns);
|
||||
out_free_ranges:
|
||||
kfree(range);
|
||||
out:
|
||||
@ -907,7 +888,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
|
||||
DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
|
||||
gtt->userptr, ttm->num_pages);
|
||||
|
||||
WARN_ONCE(!gtt->range || !gtt->range->pfns,
|
||||
WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
|
||||
"No user pages to check\n");
|
||||
|
||||
if (gtt->range) {
|
||||
@ -917,7 +898,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
|
||||
*/
|
||||
r = mmu_interval_read_retry(gtt->range->notifier,
|
||||
gtt->range->notifier_seq);
|
||||
kvfree(gtt->range->pfns);
|
||||
kvfree(gtt->range->hmm_pfns);
|
||||
kfree(gtt->range);
|
||||
gtt->range = NULL;
|
||||
}
|
||||
@ -1008,8 +989,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
|
||||
|
||||
for (i = 0; i < ttm->num_pages; i++) {
|
||||
if (ttm->pages[i] !=
|
||||
hmm_device_entry_to_page(gtt->range,
|
||||
gtt->range->pfns[i]))
|
||||
hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -85,7 +85,7 @@ static inline struct nouveau_dmem *page_to_dmem(struct page *page)
|
||||
return container_of(page->pgmap, struct nouveau_dmem, pagemap);
|
||||
}
|
||||
|
||||
static unsigned long nouveau_dmem_page_addr(struct page *page)
|
||||
unsigned long nouveau_dmem_page_addr(struct page *page)
|
||||
{
|
||||
struct nouveau_dmem_chunk *chunk = page->zone_device_data;
|
||||
unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
|
||||
@ -671,28 +671,3 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
|
||||
struct hmm_range *range)
|
||||
{
|
||||
unsigned long i, npages;
|
||||
|
||||
npages = (range->end - range->start) >> PAGE_SHIFT;
|
||||
for (i = 0; i < npages; ++i) {
|
||||
struct page *page;
|
||||
uint64_t addr;
|
||||
|
||||
page = hmm_device_entry_to_page(range, range->pfns[i]);
|
||||
if (page == NULL)
|
||||
continue;
|
||||
|
||||
if (!is_device_private_page(page))
|
||||
continue;
|
||||
|
||||
addr = nouveau_dmem_page_addr(page);
|
||||
range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
|
||||
range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
|
||||
range->pfns[i] |= NVIF_VMM_PFNMAP_V0_VRAM;
|
||||
}
|
||||
}
|
||||
|
@ -37,9 +37,8 @@ int nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long start,
|
||||
unsigned long end);
|
||||
unsigned long nouveau_dmem_page_addr(struct page *page);
|
||||
|
||||
void nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
|
||||
struct hmm_range *range);
|
||||
#else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
|
||||
static inline void nouveau_dmem_init(struct nouveau_drm *drm) {}
|
||||
static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {}
|
||||
|
@ -369,19 +369,6 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const u64
|
||||
nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = {
|
||||
[HMM_PFN_VALID ] = NVIF_VMM_PFNMAP_V0_V,
|
||||
[HMM_PFN_WRITE ] = NVIF_VMM_PFNMAP_V0_W,
|
||||
};
|
||||
|
||||
static const u64
|
||||
nouveau_svm_pfn_values[HMM_PFN_VALUE_MAX] = {
|
||||
[HMM_PFN_ERROR ] = ~NVIF_VMM_PFNMAP_V0_V,
|
||||
[HMM_PFN_NONE ] = NVIF_VMM_PFNMAP_V0_NONE,
|
||||
[HMM_PFN_SPECIAL] = ~NVIF_VMM_PFNMAP_V0_V,
|
||||
};
|
||||
|
||||
/* Issue fault replay for GPU to retry accesses that faulted previously. */
|
||||
static void
|
||||
nouveau_svm_fault_replay(struct nouveau_svm *svm)
|
||||
@ -519,9 +506,45 @@ static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
|
||||
.invalidate = nouveau_svm_range_invalidate,
|
||||
};
|
||||
|
||||
static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
|
||||
struct hmm_range *range, u64 *ioctl_addr)
|
||||
{
|
||||
unsigned long i, npages;
|
||||
|
||||
/*
|
||||
* The ioctl_addr prepared here is passed through nvif_object_ioctl()
|
||||
* to an eventual DMA map in something like gp100_vmm_pgt_pfn()
|
||||
*
|
||||
* This is all just encoding the internal hmm representation into a
|
||||
* different nouveau internal representation.
|
||||
*/
|
||||
npages = (range->end - range->start) >> PAGE_SHIFT;
|
||||
for (i = 0; i < npages; ++i) {
|
||||
struct page *page;
|
||||
|
||||
if (!(range->hmm_pfns[i] & HMM_PFN_VALID)) {
|
||||
ioctl_addr[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
page = hmm_pfn_to_page(range->hmm_pfns[i]);
|
||||
if (is_device_private_page(page))
|
||||
ioctl_addr[i] = nouveau_dmem_page_addr(page) |
|
||||
NVIF_VMM_PFNMAP_V0_V |
|
||||
NVIF_VMM_PFNMAP_V0_VRAM;
|
||||
else
|
||||
ioctl_addr[i] = page_to_phys(page) |
|
||||
NVIF_VMM_PFNMAP_V0_V |
|
||||
NVIF_VMM_PFNMAP_V0_HOST;
|
||||
if (range->hmm_pfns[i] & HMM_PFN_WRITE)
|
||||
ioctl_addr[i] |= NVIF_VMM_PFNMAP_V0_W;
|
||||
}
|
||||
}
|
||||
|
||||
static int nouveau_range_fault(struct nouveau_svmm *svmm,
|
||||
struct nouveau_drm *drm, void *data, u32 size,
|
||||
u64 *pfns, struct svm_notifier *notifier)
|
||||
unsigned long hmm_pfns[], u64 *ioctl_addr,
|
||||
struct svm_notifier *notifier)
|
||||
{
|
||||
unsigned long timeout =
|
||||
jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
@ -530,26 +553,27 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
|
||||
.notifier = ¬ifier->notifier,
|
||||
.start = notifier->notifier.interval_tree.start,
|
||||
.end = notifier->notifier.interval_tree.last + 1,
|
||||
.pfns = pfns,
|
||||
.flags = nouveau_svm_pfn_flags,
|
||||
.values = nouveau_svm_pfn_values,
|
||||
.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT,
|
||||
.pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE,
|
||||
.hmm_pfns = hmm_pfns,
|
||||
};
|
||||
struct mm_struct *mm = notifier->notifier.mm;
|
||||
long ret;
|
||||
int ret;
|
||||
|
||||
while (true) {
|
||||
if (time_after(jiffies, timeout))
|
||||
return -EBUSY;
|
||||
|
||||
range.notifier_seq = mmu_interval_read_begin(range.notifier);
|
||||
range.default_flags = 0;
|
||||
range.pfn_flags_mask = -1UL;
|
||||
down_read(&mm->mmap_sem);
|
||||
ret = hmm_range_fault(&range);
|
||||
up_read(&mm->mmap_sem);
|
||||
if (ret <= 0) {
|
||||
if (ret == 0 || ret == -EBUSY)
|
||||
if (ret) {
|
||||
/*
|
||||
* FIXME: the input PFN_REQ flags are destroyed on
|
||||
* -EBUSY, we need to regenerate them, also for the
|
||||
* other continue below
|
||||
*/
|
||||
if (ret == -EBUSY)
|
||||
continue;
|
||||
return ret;
|
||||
}
|
||||
@ -563,7 +587,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
|
||||
break;
|
||||
}
|
||||
|
||||
nouveau_dmem_convert_pfn(drm, &range);
|
||||
nouveau_hmm_convert_pfn(drm, &range, ioctl_addr);
|
||||
|
||||
svmm->vmm->vmm.object.client->super = true;
|
||||
ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL);
|
||||
@ -590,6 +614,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
|
||||
} i;
|
||||
u64 phys[16];
|
||||
} args;
|
||||
unsigned long hmm_pfns[ARRAY_SIZE(args.phys)];
|
||||
struct vm_area_struct *vma;
|
||||
u64 inst, start, limit;
|
||||
int fi, fn, pi, fill;
|
||||
@ -705,12 +730,17 @@ nouveau_svm_fault(struct nvif_notify *notify)
|
||||
* access flags.
|
||||
*XXX: atomic?
|
||||
*/
|
||||
if (buffer->fault[fn]->access != 0 /* READ. */ &&
|
||||
buffer->fault[fn]->access != 3 /* PREFETCH. */) {
|
||||
args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V |
|
||||
NVIF_VMM_PFNMAP_V0_W;
|
||||
} else {
|
||||
args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V;
|
||||
switch (buffer->fault[fn]->access) {
|
||||
case 0: /* READ. */
|
||||
hmm_pfns[pi++] = HMM_PFN_REQ_FAULT;
|
||||
break;
|
||||
case 3: /* PREFETCH. */
|
||||
hmm_pfns[pi++] = 0;
|
||||
break;
|
||||
default:
|
||||
hmm_pfns[pi++] = HMM_PFN_REQ_FAULT |
|
||||
HMM_PFN_REQ_WRITE;
|
||||
break;
|
||||
}
|
||||
args.i.p.size = pi << PAGE_SHIFT;
|
||||
|
||||
@ -738,7 +768,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
|
||||
fill = (buffer->fault[fn ]->addr -
|
||||
buffer->fault[fn - 1]->addr) >> PAGE_SHIFT;
|
||||
while (--fill)
|
||||
args.phys[pi++] = NVIF_VMM_PFNMAP_V0_NONE;
|
||||
hmm_pfns[pi++] = 0;
|
||||
}
|
||||
|
||||
SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)",
|
||||
@ -754,7 +784,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
|
||||
ret = nouveau_range_fault(
|
||||
svmm, svm->drm, &args,
|
||||
sizeof(args.i) + pi * sizeof(args.phys[0]),
|
||||
args.phys, ¬ifier);
|
||||
hmm_pfns, args.phys, ¬ifier);
|
||||
mmu_interval_notifier_remove(¬ifier.notifier);
|
||||
}
|
||||
mmput(mm);
|
||||
|
@ -19,51 +19,47 @@
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
/*
|
||||
* hmm_pfn_flag_e - HMM flag enums
|
||||
* On output:
|
||||
* 0 - The page is faultable and a future call with
|
||||
* HMM_PFN_REQ_FAULT could succeed.
|
||||
* HMM_PFN_VALID - the pfn field points to a valid PFN. This PFN is at
|
||||
* least readable. If dev_private_owner is !NULL then this could
|
||||
* point at a DEVICE_PRIVATE page.
|
||||
* HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID)
|
||||
* HMM_PFN_ERROR - accessing the pfn is impossible and the device should
|
||||
* fail. ie poisoned memory, special pages, no vma, etc
|
||||
*
|
||||
* Flags:
|
||||
* HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
|
||||
* HMM_PFN_WRITE: CPU page table has write permission set
|
||||
*
|
||||
* The driver provides a flags array for mapping page protections to device
|
||||
* PTE bits. If the driver valid bit for an entry is bit 3,
|
||||
* i.e., (entry & (1 << 3)), then the driver must provide
|
||||
* an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
|
||||
* Same logic apply to all flags. This is the same idea as vm_page_prot in vma
|
||||
* except that this is per device driver rather than per architecture.
|
||||
* On input:
|
||||
* 0 - Return the current state of the page, do not fault it.
|
||||
* HMM_PFN_REQ_FAULT - The output must have HMM_PFN_VALID or hmm_range_fault()
|
||||
* will fail
|
||||
* HMM_PFN_REQ_WRITE - The output must have HMM_PFN_WRITE or hmm_range_fault()
|
||||
* will fail. Must be combined with HMM_PFN_REQ_FAULT.
|
||||
*/
|
||||
enum hmm_pfn_flag_e {
|
||||
HMM_PFN_VALID = 0,
|
||||
HMM_PFN_WRITE,
|
||||
HMM_PFN_FLAG_MAX
|
||||
enum hmm_pfn_flags {
|
||||
/* Output flags */
|
||||
HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1),
|
||||
HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2),
|
||||
HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3),
|
||||
|
||||
/* Input flags */
|
||||
HMM_PFN_REQ_FAULT = HMM_PFN_VALID,
|
||||
HMM_PFN_REQ_WRITE = HMM_PFN_WRITE,
|
||||
|
||||
HMM_PFN_FLAGS = HMM_PFN_VALID | HMM_PFN_WRITE | HMM_PFN_ERROR,
|
||||
};
|
||||
|
||||
/*
|
||||
* hmm_pfn_value_e - HMM pfn special value
|
||||
* hmm_pfn_to_page() - return struct page pointed to by a device entry
|
||||
*
|
||||
* Flags:
|
||||
* HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
|
||||
* HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
|
||||
* HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
|
||||
* result of vmf_insert_pfn() or vm_insert_page(). Therefore, it should not
|
||||
* be mirrored by a device, because the entry will never have HMM_PFN_VALID
|
||||
* set and the pfn value is undefined.
|
||||
*
|
||||
* Driver provides values for none entry, error entry, and special entry.
|
||||
* Driver can alias (i.e., use same value) error and special, but
|
||||
* it should not alias none with error or special.
|
||||
*
|
||||
* HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be:
|
||||
* hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous,
|
||||
* hmm_range.values[HMM_PFN_NONE] if there is no CPU page table entry,
|
||||
* hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one
|
||||
* This must be called under the caller 'user_lock' after a successful
|
||||
* mmu_interval_read_begin(). The caller must have tested for HMM_PFN_VALID
|
||||
* already.
|
||||
*/
|
||||
enum hmm_pfn_value_e {
|
||||
HMM_PFN_ERROR,
|
||||
HMM_PFN_NONE,
|
||||
HMM_PFN_SPECIAL,
|
||||
HMM_PFN_VALUE_MAX
|
||||
};
|
||||
static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn)
|
||||
{
|
||||
return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS);
|
||||
}
|
||||
|
||||
/*
|
||||
* struct hmm_range - track invalidation lock on virtual address range
|
||||
@ -72,12 +68,9 @@ enum hmm_pfn_value_e {
|
||||
* @notifier_seq: result of mmu_interval_read_begin()
|
||||
* @start: range virtual start address (inclusive)
|
||||
* @end: range virtual end address (exclusive)
|
||||
* @pfns: array of pfns (big enough for the range)
|
||||
* @flags: pfn flags to match device driver page table
|
||||
* @values: pfn value for some special case (none, special, error, ...)
|
||||
* @hmm_pfns: array of pfns (big enough for the range)
|
||||
* @default_flags: default flags for the range (write, read, ... see hmm doc)
|
||||
* @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
|
||||
* @pfn_shift: pfn shift value (should be <= PAGE_SHIFT)
|
||||
* @dev_private_owner: owner of device private pages
|
||||
*/
|
||||
struct hmm_range {
|
||||
@ -85,42 +78,16 @@ struct hmm_range {
|
||||
unsigned long notifier_seq;
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
uint64_t *pfns;
|
||||
const uint64_t *flags;
|
||||
const uint64_t *values;
|
||||
uint64_t default_flags;
|
||||
uint64_t pfn_flags_mask;
|
||||
uint8_t pfn_shift;
|
||||
unsigned long *hmm_pfns;
|
||||
unsigned long default_flags;
|
||||
unsigned long pfn_flags_mask;
|
||||
void *dev_private_owner;
|
||||
};
|
||||
|
||||
/*
|
||||
* hmm_device_entry_to_page() - return struct page pointed to by a device entry
|
||||
* @range: range use to decode device entry value
|
||||
* @entry: device entry value to get corresponding struct page from
|
||||
* Return: struct page pointer if entry is a valid, NULL otherwise
|
||||
*
|
||||
* If the device entry is valid (ie valid flag set) then return the struct page
|
||||
* matching the entry value. Otherwise return NULL.
|
||||
*/
|
||||
static inline struct page *hmm_device_entry_to_page(const struct hmm_range *range,
|
||||
uint64_t entry)
|
||||
{
|
||||
if (entry == range->values[HMM_PFN_NONE])
|
||||
return NULL;
|
||||
if (entry == range->values[HMM_PFN_ERROR])
|
||||
return NULL;
|
||||
if (entry == range->values[HMM_PFN_SPECIAL])
|
||||
return NULL;
|
||||
if (!(entry & range->flags[HMM_PFN_VALID]))
|
||||
return NULL;
|
||||
return pfn_to_page(entry >> range->pfn_shift);
|
||||
}
|
||||
|
||||
/*
|
||||
* Please see Documentation/vm/hmm.rst for how to use the range API.
|
||||
*/
|
||||
long hmm_range_fault(struct hmm_range *range);
|
||||
int hmm_range_fault(struct hmm_range *range);
|
||||
|
||||
/*
|
||||
* HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range
|
||||
|
@ -2218,6 +2218,19 @@ config TEST_MEMINIT
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config TEST_HMM
|
||||
tristate "Test HMM (Heterogeneous Memory Management)"
|
||||
depends on TRANSPARENT_HUGEPAGE
|
||||
depends on DEVICE_PRIVATE
|
||||
select HMM_MIRROR
|
||||
select MMU_NOTIFIER
|
||||
help
|
||||
This is a pseudo device driver solely for testing HMM.
|
||||
Say M here if you want to build the HMM test module.
|
||||
Doing so will allow you to run tools/testing/selftest/vm/hmm-tests.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endif # RUNTIME_TESTING_MENU
|
||||
|
||||
config MEMTEST
|
||||
|
@ -92,6 +92,7 @@ obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o
|
||||
obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
|
||||
obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
|
||||
obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o
|
||||
obj-$(CONFIG_TEST_HMM) += test_hmm.o
|
||||
|
||||
obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
|
||||
|
||||
|
1164
lib/test_hmm.c
Normal file
1164
lib/test_hmm.c
Normal file
File diff suppressed because it is too large
Load Diff
59
lib/test_hmm_uapi.h
Normal file
59
lib/test_hmm_uapi.h
Normal file
@ -0,0 +1,59 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
* This is a module to test the HMM (Heterogeneous Memory Management) API
|
||||
* of the kernel. It allows a userspace program to expose its entire address
|
||||
* space through the HMM test module device file.
|
||||
*/
|
||||
#ifndef _LIB_TEST_HMM_UAPI_H
|
||||
#define _LIB_TEST_HMM_UAPI_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/ioctl.h>
|
||||
|
||||
/*
|
||||
* Structure to pass to the HMM test driver to mimic a device accessing
|
||||
* system memory and ZONE_DEVICE private memory through device page tables.
|
||||
*
|
||||
* @addr: (in) user address the device will read/write
|
||||
* @ptr: (in) user address where device data is copied to/from
|
||||
* @npages: (in) number of pages to read/write
|
||||
* @cpages: (out) number of pages copied
|
||||
* @faults: (out) number of device page faults seen
|
||||
*/
|
||||
struct hmm_dmirror_cmd {
|
||||
__u64 addr;
|
||||
__u64 ptr;
|
||||
__u64 npages;
|
||||
__u64 cpages;
|
||||
__u64 faults;
|
||||
};
|
||||
|
||||
/* Expose the address space of the calling process through hmm device file */
|
||||
#define HMM_DMIRROR_READ _IOWR('H', 0x00, struct hmm_dmirror_cmd)
|
||||
#define HMM_DMIRROR_WRITE _IOWR('H', 0x01, struct hmm_dmirror_cmd)
|
||||
#define HMM_DMIRROR_MIGRATE _IOWR('H', 0x02, struct hmm_dmirror_cmd)
|
||||
#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd)
|
||||
|
||||
/*
|
||||
* Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
|
||||
* HMM_DMIRROR_PROT_ERROR: no valid mirror PTE for this page
|
||||
* HMM_DMIRROR_PROT_NONE: unpopulated PTE or PTE with no access
|
||||
* HMM_DMIRROR_PROT_READ: read-only PTE
|
||||
* HMM_DMIRROR_PROT_WRITE: read/write PTE
|
||||
* HMM_DMIRROR_PROT_ZERO: special read-only zero page
|
||||
* HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL: Migrated device private page on the
|
||||
* device the ioctl() is made
|
||||
* HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
|
||||
* other device
|
||||
*/
|
||||
enum {
|
||||
HMM_DMIRROR_PROT_ERROR = 0xFF,
|
||||
HMM_DMIRROR_PROT_NONE = 0x00,
|
||||
HMM_DMIRROR_PROT_READ = 0x01,
|
||||
HMM_DMIRROR_PROT_WRITE = 0x02,
|
||||
HMM_DMIRROR_PROT_ZERO = 0x10,
|
||||
HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20,
|
||||
HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30,
|
||||
};
|
||||
|
||||
#endif /* _LIB_TEST_HMM_UAPI_H */
|
185
mm/hmm.c
185
mm/hmm.c
@ -37,28 +37,13 @@ enum {
|
||||
HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT,
|
||||
};
|
||||
|
||||
/*
|
||||
* hmm_device_entry_from_pfn() - create a valid device entry value from pfn
|
||||
* @range: range use to encode HMM pfn value
|
||||
* @pfn: pfn value for which to create the device entry
|
||||
* Return: valid device entry for the pfn
|
||||
*/
|
||||
static uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
|
||||
unsigned long pfn)
|
||||
{
|
||||
return (pfn << range->pfn_shift) | range->flags[HMM_PFN_VALID];
|
||||
}
|
||||
|
||||
static int hmm_pfns_fill(unsigned long addr, unsigned long end,
|
||||
struct hmm_range *range, enum hmm_pfn_value_e value)
|
||||
struct hmm_range *range, unsigned long cpu_flags)
|
||||
{
|
||||
uint64_t *pfns = range->pfns;
|
||||
unsigned long i;
|
||||
unsigned long i = (addr - range->start) >> PAGE_SHIFT;
|
||||
|
||||
i = (addr - range->start) >> PAGE_SHIFT;
|
||||
for (; addr < end; addr += PAGE_SIZE, i++)
|
||||
pfns[i] = range->values[value];
|
||||
|
||||
range->hmm_pfns[i] = cpu_flags;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -96,7 +81,8 @@ static int hmm_vma_fault(unsigned long addr, unsigned long end,
|
||||
}
|
||||
|
||||
static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
||||
uint64_t pfns, uint64_t cpu_flags)
|
||||
unsigned long pfn_req_flags,
|
||||
unsigned long cpu_flags)
|
||||
{
|
||||
struct hmm_range *range = hmm_vma_walk->range;
|
||||
|
||||
@ -110,27 +96,28 @@ static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
||||
* waste to have the user pre-fill the pfn arrays with a default
|
||||
* flags value.
|
||||
*/
|
||||
pfns = (pfns & range->pfn_flags_mask) | range->default_flags;
|
||||
pfn_req_flags &= range->pfn_flags_mask;
|
||||
pfn_req_flags |= range->default_flags;
|
||||
|
||||
/* We aren't ask to do anything ... */
|
||||
if (!(pfns & range->flags[HMM_PFN_VALID]))
|
||||
if (!(pfn_req_flags & HMM_PFN_REQ_FAULT))
|
||||
return 0;
|
||||
|
||||
/* Need to write fault ? */
|
||||
if ((pfns & range->flags[HMM_PFN_WRITE]) &&
|
||||
!(cpu_flags & range->flags[HMM_PFN_WRITE]))
|
||||
if ((pfn_req_flags & HMM_PFN_REQ_WRITE) &&
|
||||
!(cpu_flags & HMM_PFN_WRITE))
|
||||
return HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT;
|
||||
|
||||
/* If CPU page table is not valid then we need to fault */
|
||||
if (!(cpu_flags & range->flags[HMM_PFN_VALID]))
|
||||
if (!(cpu_flags & HMM_PFN_VALID))
|
||||
return HMM_NEED_FAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
||||
const uint64_t *pfns, unsigned long npages,
|
||||
uint64_t cpu_flags)
|
||||
const unsigned long hmm_pfns[], unsigned long npages,
|
||||
unsigned long cpu_flags)
|
||||
{
|
||||
struct hmm_range *range = hmm_vma_walk->range;
|
||||
unsigned int required_fault = 0;
|
||||
@ -142,12 +129,12 @@ hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
|
||||
* hmm_pte_need_fault() will always return 0.
|
||||
*/
|
||||
if (!((range->default_flags | range->pfn_flags_mask) &
|
||||
range->flags[HMM_PFN_VALID]))
|
||||
HMM_PFN_REQ_FAULT))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < npages; ++i) {
|
||||
required_fault |=
|
||||
hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags);
|
||||
required_fault |= hmm_pte_need_fault(hmm_vma_walk, hmm_pfns[i],
|
||||
cpu_flags);
|
||||
if (required_fault == HMM_NEED_ALL_BITS)
|
||||
return required_fault;
|
||||
}
|
||||
@ -161,12 +148,13 @@ static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
|
||||
struct hmm_range *range = hmm_vma_walk->range;
|
||||
unsigned int required_fault;
|
||||
unsigned long i, npages;
|
||||
uint64_t *pfns;
|
||||
unsigned long *hmm_pfns;
|
||||
|
||||
i = (addr - range->start) >> PAGE_SHIFT;
|
||||
npages = (end - addr) >> PAGE_SHIFT;
|
||||
pfns = &range->pfns[i];
|
||||
required_fault = hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0);
|
||||
hmm_pfns = &range->hmm_pfns[i];
|
||||
required_fault =
|
||||
hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0);
|
||||
if (!walk->vma) {
|
||||
if (required_fault)
|
||||
return -EFAULT;
|
||||
@ -174,46 +162,44 @@ static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
|
||||
}
|
||||
if (required_fault)
|
||||
return hmm_vma_fault(addr, end, required_fault, walk);
|
||||
hmm_vma_walk->last = addr;
|
||||
return hmm_pfns_fill(addr, end, range, HMM_PFN_NONE);
|
||||
return hmm_pfns_fill(addr, end, range, 0);
|
||||
}
|
||||
|
||||
static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
|
||||
static inline unsigned long pmd_to_hmm_pfn_flags(struct hmm_range *range,
|
||||
pmd_t pmd)
|
||||
{
|
||||
if (pmd_protnone(pmd))
|
||||
return 0;
|
||||
return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] |
|
||||
range->flags[HMM_PFN_WRITE] :
|
||||
range->flags[HMM_PFN_VALID];
|
||||
return pmd_write(pmd) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
|
||||
unsigned long end, uint64_t *pfns, pmd_t pmd)
|
||||
unsigned long end, unsigned long hmm_pfns[],
|
||||
pmd_t pmd)
|
||||
{
|
||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||
struct hmm_range *range = hmm_vma_walk->range;
|
||||
unsigned long pfn, npages, i;
|
||||
unsigned int required_fault;
|
||||
uint64_t cpu_flags;
|
||||
unsigned long cpu_flags;
|
||||
|
||||
npages = (end - addr) >> PAGE_SHIFT;
|
||||
cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
|
||||
required_fault =
|
||||
hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags);
|
||||
hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, cpu_flags);
|
||||
if (required_fault)
|
||||
return hmm_vma_fault(addr, end, required_fault, walk);
|
||||
|
||||
pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
|
||||
pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags;
|
||||
hmm_vma_walk->last = end;
|
||||
hmm_pfns[i] = pfn | cpu_flags;
|
||||
return 0;
|
||||
}
|
||||
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
/* stub to allow the code below to compile */
|
||||
int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
|
||||
unsigned long end, uint64_t *pfns, pmd_t pmd);
|
||||
unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
static inline bool hmm_is_device_private_entry(struct hmm_range *range,
|
||||
@ -224,31 +210,31 @@ static inline bool hmm_is_device_private_entry(struct hmm_range *range,
|
||||
range->dev_private_owner;
|
||||
}
|
||||
|
||||
static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
|
||||
static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
|
||||
pte_t pte)
|
||||
{
|
||||
if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte))
|
||||
return 0;
|
||||
return pte_write(pte) ? range->flags[HMM_PFN_VALID] |
|
||||
range->flags[HMM_PFN_WRITE] :
|
||||
range->flags[HMM_PFN_VALID];
|
||||
return pte_write(pte) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
|
||||
}
|
||||
|
||||
static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
||||
unsigned long end, pmd_t *pmdp, pte_t *ptep,
|
||||
uint64_t *pfn)
|
||||
unsigned long *hmm_pfn)
|
||||
{
|
||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||
struct hmm_range *range = hmm_vma_walk->range;
|
||||
unsigned int required_fault;
|
||||
uint64_t cpu_flags;
|
||||
unsigned long cpu_flags;
|
||||
pte_t pte = *ptep;
|
||||
uint64_t orig_pfn = *pfn;
|
||||
uint64_t pfn_req_flags = *hmm_pfn;
|
||||
|
||||
if (pte_none(pte)) {
|
||||
required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0);
|
||||
required_fault =
|
||||
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
|
||||
if (required_fault)
|
||||
goto fault;
|
||||
*pfn = range->values[HMM_PFN_NONE];
|
||||
*hmm_pfn = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -260,17 +246,18 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
||||
* the PFN even if not present.
|
||||
*/
|
||||
if (hmm_is_device_private_entry(range, entry)) {
|
||||
*pfn = hmm_device_entry_from_pfn(range,
|
||||
device_private_entry_to_pfn(entry));
|
||||
*pfn |= range->flags[HMM_PFN_VALID];
|
||||
cpu_flags = HMM_PFN_VALID;
|
||||
if (is_write_device_private_entry(entry))
|
||||
*pfn |= range->flags[HMM_PFN_WRITE];
|
||||
cpu_flags |= HMM_PFN_WRITE;
|
||||
*hmm_pfn = device_private_entry_to_pfn(entry) |
|
||||
cpu_flags;
|
||||
return 0;
|
||||
}
|
||||
|
||||
required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0);
|
||||
required_fault =
|
||||
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
|
||||
if (!required_fault) {
|
||||
*pfn = range->values[HMM_PFN_NONE];
|
||||
*hmm_pfn = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -290,7 +277,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
||||
}
|
||||
|
||||
cpu_flags = pte_to_hmm_pfn_flags(range, pte);
|
||||
required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags);
|
||||
required_fault =
|
||||
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
|
||||
if (required_fault)
|
||||
goto fault;
|
||||
|
||||
@ -299,15 +287,15 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
|
||||
* fall through and treat it like a normal page.
|
||||
*/
|
||||
if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) {
|
||||
if (hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0)) {
|
||||
if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
|
||||
pte_unmap(ptep);
|
||||
return -EFAULT;
|
||||
}
|
||||
*pfn = range->values[HMM_PFN_SPECIAL];
|
||||
*hmm_pfn = HMM_PFN_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*pfn = hmm_device_entry_from_pfn(range, pte_pfn(pte)) | cpu_flags;
|
||||
*hmm_pfn = pte_pfn(pte) | cpu_flags;
|
||||
return 0;
|
||||
|
||||
fault:
|
||||
@ -323,7 +311,8 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
|
||||
{
|
||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||
struct hmm_range *range = hmm_vma_walk->range;
|
||||
uint64_t *pfns = &range->pfns[(start - range->start) >> PAGE_SHIFT];
|
||||
unsigned long *hmm_pfns =
|
||||
&range->hmm_pfns[(start - range->start) >> PAGE_SHIFT];
|
||||
unsigned long npages = (end - start) >> PAGE_SHIFT;
|
||||
unsigned long addr = start;
|
||||
pte_t *ptep;
|
||||
@ -335,16 +324,16 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
|
||||
return hmm_vma_walk_hole(start, end, -1, walk);
|
||||
|
||||
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
|
||||
if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0)) {
|
||||
if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) {
|
||||
hmm_vma_walk->last = addr;
|
||||
pmd_migration_entry_wait(walk->mm, pmdp);
|
||||
return -EBUSY;
|
||||
}
|
||||
return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
|
||||
return hmm_pfns_fill(start, end, range, 0);
|
||||
}
|
||||
|
||||
if (!pmd_present(pmd)) {
|
||||
if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0))
|
||||
if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
|
||||
return -EFAULT;
|
||||
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
||||
}
|
||||
@ -364,7 +353,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
|
||||
if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
|
||||
goto again;
|
||||
|
||||
return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd);
|
||||
return hmm_vma_handle_pmd(walk, addr, end, hmm_pfns, pmd);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -374,37 +363,33 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
|
||||
* recover.
|
||||
*/
|
||||
if (pmd_bad(pmd)) {
|
||||
if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0))
|
||||
if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
|
||||
return -EFAULT;
|
||||
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
||||
}
|
||||
|
||||
ptep = pte_offset_map(pmdp, addr);
|
||||
for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) {
|
||||
for (; addr < end; addr += PAGE_SIZE, ptep++, hmm_pfns++) {
|
||||
int r;
|
||||
|
||||
r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns);
|
||||
r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, hmm_pfns);
|
||||
if (r) {
|
||||
/* hmm_vma_handle_pte() did pte_unmap() */
|
||||
hmm_vma_walk->last = addr;
|
||||
return r;
|
||||
}
|
||||
}
|
||||
pte_unmap(ptep - 1);
|
||||
|
||||
hmm_vma_walk->last = addr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
|
||||
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
|
||||
static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
|
||||
static inline unsigned long pud_to_hmm_pfn_flags(struct hmm_range *range,
|
||||
pud_t pud)
|
||||
{
|
||||
if (!pud_present(pud))
|
||||
return 0;
|
||||
return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
|
||||
range->flags[HMM_PFN_WRITE] :
|
||||
range->flags[HMM_PFN_VALID];
|
||||
return pud_write(pud) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
|
||||
}
|
||||
|
||||
static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
||||
@ -432,7 +417,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
||||
if (pud_huge(pud) && pud_devmap(pud)) {
|
||||
unsigned long i, npages, pfn;
|
||||
unsigned int required_fault;
|
||||
uint64_t *pfns, cpu_flags;
|
||||
unsigned long *hmm_pfns;
|
||||
unsigned long cpu_flags;
|
||||
|
||||
if (!pud_present(pud)) {
|
||||
spin_unlock(ptl);
|
||||
@ -441,10 +427,10 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
||||
|
||||
i = (addr - range->start) >> PAGE_SHIFT;
|
||||
npages = (end - addr) >> PAGE_SHIFT;
|
||||
pfns = &range->pfns[i];
|
||||
hmm_pfns = &range->hmm_pfns[i];
|
||||
|
||||
cpu_flags = pud_to_hmm_pfn_flags(range, pud);
|
||||
required_fault = hmm_range_need_fault(hmm_vma_walk, pfns,
|
||||
required_fault = hmm_range_need_fault(hmm_vma_walk, hmm_pfns,
|
||||
npages, cpu_flags);
|
||||
if (required_fault) {
|
||||
spin_unlock(ptl);
|
||||
@ -453,9 +439,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
|
||||
|
||||
pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
for (i = 0; i < npages; ++i, ++pfn)
|
||||
pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
|
||||
cpu_flags;
|
||||
hmm_vma_walk->last = end;
|
||||
hmm_pfns[i] = pfn | cpu_flags;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
@ -479,8 +463,9 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
|
||||
struct hmm_vma_walk *hmm_vma_walk = walk->private;
|
||||
struct hmm_range *range = hmm_vma_walk->range;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
uint64_t orig_pfn, cpu_flags;
|
||||
unsigned int required_fault;
|
||||
unsigned long pfn_req_flags;
|
||||
unsigned long cpu_flags;
|
||||
spinlock_t *ptl;
|
||||
pte_t entry;
|
||||
|
||||
@ -488,9 +473,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
|
||||
entry = huge_ptep_get(pte);
|
||||
|
||||
i = (start - range->start) >> PAGE_SHIFT;
|
||||
orig_pfn = range->pfns[i];
|
||||
pfn_req_flags = range->hmm_pfns[i];
|
||||
cpu_flags = pte_to_hmm_pfn_flags(range, entry);
|
||||
required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags);
|
||||
required_fault =
|
||||
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
|
||||
if (required_fault) {
|
||||
spin_unlock(ptl);
|
||||
return hmm_vma_fault(addr, end, required_fault, walk);
|
||||
@ -498,9 +484,8 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
|
||||
|
||||
pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
|
||||
for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
|
||||
range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
|
||||
cpu_flags;
|
||||
hmm_vma_walk->last = end;
|
||||
range->hmm_pfns[i] = pfn | cpu_flags;
|
||||
|
||||
spin_unlock(ptl);
|
||||
return 0;
|
||||
}
|
||||
@ -531,13 +516,12 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
|
||||
* failure.
|
||||
*/
|
||||
if (hmm_range_need_fault(hmm_vma_walk,
|
||||
range->pfns +
|
||||
range->hmm_pfns +
|
||||
((start - range->start) >> PAGE_SHIFT),
|
||||
(end - start) >> PAGE_SHIFT, 0))
|
||||
return -EFAULT;
|
||||
|
||||
hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
|
||||
hmm_vma_walk->last = end;
|
||||
|
||||
/* Skip this vma and continue processing the next vma. */
|
||||
return 1;
|
||||
@ -555,9 +539,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
|
||||
* hmm_range_fault - try to fault some address in a virtual address range
|
||||
* @range: argument structure
|
||||
*
|
||||
* Return: the number of valid pages in range->pfns[] (from range start
|
||||
* address), which may be zero. On error one of the following status codes
|
||||
* can be returned:
|
||||
* Returns 0 on success or one of the following error codes:
|
||||
*
|
||||
* -EINVAL: Invalid arguments or mm or virtual address is in an invalid vma
|
||||
* (e.g., device file vma).
|
||||
@ -572,7 +554,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
|
||||
* This is similar to get_user_pages(), except that it can read the page tables
|
||||
* without mutating them (ie causing faults).
|
||||
*/
|
||||
long hmm_range_fault(struct hmm_range *range)
|
||||
int hmm_range_fault(struct hmm_range *range)
|
||||
{
|
||||
struct hmm_vma_walk hmm_vma_walk = {
|
||||
.range = range,
|
||||
@ -590,10 +572,13 @@ long hmm_range_fault(struct hmm_range *range)
|
||||
return -EBUSY;
|
||||
ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
|
||||
&hmm_walk_ops, &hmm_vma_walk);
|
||||
/*
|
||||
* When -EBUSY is returned the loop restarts with
|
||||
* hmm_vma_walk.last set to an address that has not been stored
|
||||
* in pfns. All entries < last in the pfn array are set to their
|
||||
* output, and all >= are still at their input values.
|
||||
*/
|
||||
} while (ret == -EBUSY);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(hmm_range_fault);
|
||||
|
1
tools/testing/selftests/vm/.gitignore
vendored
1
tools/testing/selftests/vm/.gitignore
vendored
@ -17,3 +17,4 @@ gup_benchmark
|
||||
va_128TBswitch
|
||||
map_fixed_noreplace
|
||||
write_to_hugetlbfs
|
||||
hmm-tests
|
||||
|
@ -7,6 +7,7 @@ CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
|
||||
LDLIBS = -lrt
|
||||
TEST_GEN_FILES = compaction_test
|
||||
TEST_GEN_FILES += gup_benchmark
|
||||
TEST_GEN_FILES += hmm-tests
|
||||
TEST_GEN_FILES += hugepage-mmap
|
||||
TEST_GEN_FILES += hugepage-shm
|
||||
TEST_GEN_FILES += map_hugetlb
|
||||
@ -33,6 +34,8 @@ TEST_FILES := test_vmalloc.sh
|
||||
KSFT_KHDR_INSTALL := 1
|
||||
include ../lib.mk
|
||||
|
||||
$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread
|
||||
|
||||
$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
|
||||
|
||||
$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
|
||||
|
@ -1,3 +1,5 @@
|
||||
CONFIG_SYSVIPC=y
|
||||
CONFIG_USERFAULTFD=y
|
||||
CONFIG_TEST_VMALLOC=m
|
||||
CONFIG_DEVICE_PRIVATE=y
|
||||
CONFIG_TEST_HMM=m
|
||||
|
1359
tools/testing/selftests/vm/hmm-tests.c
Normal file
1359
tools/testing/selftests/vm/hmm-tests.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -307,4 +307,20 @@ else
|
||||
echo "[FAIL]"
|
||||
exitcode=1
|
||||
fi
|
||||
|
||||
echo "running HMM smoke test"
|
||||
echo "------------------------------------"
|
||||
./test_hmm.sh smoke
|
||||
ret_val=$?
|
||||
|
||||
if [ $ret_val -eq 0 ]; then
|
||||
echo "[PASS]"
|
||||
elif [ $ret_val -eq $ksft_skip ]; then
|
||||
echo "[SKIP]"
|
||||
exitcode=$ksft_skip
|
||||
else
|
||||
echo "[FAIL]"
|
||||
exitcode=1
|
||||
fi
|
||||
|
||||
exit $exitcode
|
||||
|
97
tools/testing/selftests/vm/test_hmm.sh
Executable file
97
tools/testing/selftests/vm/test_hmm.sh
Executable file
@ -0,0 +1,97 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
|
||||
#
|
||||
# This is a test script for the kernel test driver to analyse vmalloc
|
||||
# allocator. Therefore it is just a kernel module loader. You can specify
|
||||
# and pass different parameters in order to:
|
||||
# a) analyse performance of vmalloc allocations;
|
||||
# b) stressing and stability check of vmalloc subsystem.
|
||||
|
||||
TEST_NAME="test_hmm"
|
||||
DRIVER="test_hmm"
|
||||
|
||||
# 1 if fails
|
||||
exitcode=1
|
||||
|
||||
# Kselftest framework requirement - SKIP code is 4.
|
||||
ksft_skip=4
|
||||
|
||||
check_test_requirements()
|
||||
{
|
||||
uid=$(id -u)
|
||||
if [ $uid -ne 0 ]; then
|
||||
echo "$0: Must be run as root"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
if ! which modprobe > /dev/null 2>&1; then
|
||||
echo "$0: You need modprobe installed"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
if ! modinfo $DRIVER > /dev/null 2>&1; then
|
||||
echo "$0: You must have the following enabled in your kernel:"
|
||||
echo "CONFIG_TEST_HMM=m"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
}
|
||||
|
||||
load_driver()
|
||||
{
|
||||
modprobe $DRIVER > /dev/null 2>&1
|
||||
if [ $? == 0 ]; then
|
||||
major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
|
||||
mknod /dev/hmm_dmirror0 c $major 0
|
||||
mknod /dev/hmm_dmirror1 c $major 1
|
||||
fi
|
||||
}
|
||||
|
||||
unload_driver()
|
||||
{
|
||||
modprobe -r $DRIVER > /dev/null 2>&1
|
||||
rm -f /dev/hmm_dmirror?
|
||||
}
|
||||
|
||||
run_smoke()
|
||||
{
|
||||
echo "Running smoke test. Note, this test provides basic coverage."
|
||||
|
||||
load_driver
|
||||
$(dirname "${BASH_SOURCE[0]}")/hmm-tests
|
||||
unload_driver
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
echo -n "Usage: $0"
|
||||
echo
|
||||
echo "Example usage:"
|
||||
echo
|
||||
echo "# Shows help message"
|
||||
echo "./${TEST_NAME}.sh"
|
||||
echo
|
||||
echo "# Smoke testing"
|
||||
echo "./${TEST_NAME}.sh smoke"
|
||||
echo
|
||||
exit 0
|
||||
}
|
||||
|
||||
function run_test()
|
||||
{
|
||||
if [ $# -eq 0 ]; then
|
||||
usage
|
||||
else
|
||||
if [ "$1" = "smoke" ]; then
|
||||
run_smoke
|
||||
else
|
||||
usage
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
check_test_requirements
|
||||
run_test $@
|
||||
|
||||
exit 0
|
Loading…
Reference in New Issue
Block a user