mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-16 05:26:07 +00:00
KVM: Provide refcounted page as output field in struct kvm_follow_pfn
Add kvm_follow_pfn.refcounted_page as an output for the "to pfn" APIs to "return" the struct page that is associated with the returned pfn (if KVM acquired a reference to the page). This will eventually allow removing KVM's hacky kvm_pfn_to_refcounted_page() code, which is error prone and can't detect pfns that are valid, but aren't (currently) refcounted. Tested-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Sean Christopherson <seanjc@google.com> Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Message-ID: <20241010182427.1434605-28-seanjc@google.com>
This commit is contained in:
parent
12fac89950
commit
3dd48ecfac
@ -2746,6 +2746,46 @@ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *w
|
||||
return gfn_to_hva_memslot_prot(slot, gfn, writable);
|
||||
}
|
||||
|
||||
static kvm_pfn_t kvm_resolve_pfn(struct kvm_follow_pfn *kfp, struct page *page,
|
||||
struct follow_pfnmap_args *map, bool writable)
|
||||
{
|
||||
kvm_pfn_t pfn;
|
||||
|
||||
WARN_ON_ONCE(!!page == !!map);
|
||||
|
||||
if (kfp->map_writable)
|
||||
*kfp->map_writable = writable;
|
||||
|
||||
/*
|
||||
* FIXME: Remove this once KVM no longer blindly calls put_page() on
|
||||
* every pfn that points at a struct page.
|
||||
*
|
||||
* Get a reference for follow_pte() pfns if they happen to point at a
|
||||
* struct page, as KVM will ultimately call kvm_release_pfn_clean() on
|
||||
* the returned pfn, i.e. KVM expects to have a reference.
|
||||
*
|
||||
* Certain IO or PFNMAP mappings can be backed with valid struct pages,
|
||||
* but be allocated without refcounting, e.g. tail pages of
|
||||
* non-compound higher order allocations. Grabbing and putting a
|
||||
* reference to such pages would cause KVM to prematurely free a page
|
||||
* it doesn't own (KVM gets and puts the one and only reference).
|
||||
* Don't allow those pages until the FIXME is resolved.
|
||||
*/
|
||||
if (map) {
|
||||
pfn = map->pfn;
|
||||
page = kvm_pfn_to_refcounted_page(pfn);
|
||||
if (page && !get_page_unless_zero(page))
|
||||
return KVM_PFN_ERR_FAULT;
|
||||
} else {
|
||||
pfn = page_to_pfn(page);
|
||||
}
|
||||
|
||||
if (kfp->refcounted_page)
|
||||
*kfp->refcounted_page = page;
|
||||
|
||||
return pfn;
|
||||
}
|
||||
|
||||
/*
|
||||
* The fast path to get the writable pfn which will be stored in @pfn,
|
||||
* true indicates success, otherwise false is returned.
|
||||
@ -2763,9 +2803,7 @@ static bool hva_to_pfn_fast(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
|
||||
return false;
|
||||
|
||||
if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, &page)) {
|
||||
*pfn = page_to_pfn(page);
|
||||
if (kfp->map_writable)
|
||||
*kfp->map_writable = true;
|
||||
*pfn = kvm_resolve_pfn(kfp, page, NULL, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2797,23 +2835,15 @@ static int hva_to_pfn_slow(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
|
||||
if (npages != 1)
|
||||
return npages;
|
||||
|
||||
if (!kfp->map_writable)
|
||||
goto out;
|
||||
|
||||
if (kfp->flags & FOLL_WRITE) {
|
||||
*kfp->map_writable = true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* map read fault as writable if possible */
|
||||
if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) {
|
||||
*kfp->map_writable = true;
|
||||
if (!(flags & FOLL_WRITE) && kfp->map_writable &&
|
||||
get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) {
|
||||
put_page(page);
|
||||
page = wpage;
|
||||
flags |= FOLL_WRITE;
|
||||
}
|
||||
|
||||
out:
|
||||
*pfn = page_to_pfn(page);
|
||||
*pfn = kvm_resolve_pfn(kfp, page, NULL, flags & FOLL_WRITE);
|
||||
return npages;
|
||||
}
|
||||
|
||||
@ -2828,22 +2858,11 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
|
||||
return true;
|
||||
}
|
||||
|
||||
static int kvm_try_get_pfn(kvm_pfn_t pfn)
|
||||
{
|
||||
struct page *page = kvm_pfn_to_refcounted_page(pfn);
|
||||
|
||||
if (!page)
|
||||
return 1;
|
||||
|
||||
return get_page_unless_zero(page);
|
||||
}
|
||||
|
||||
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
|
||||
struct kvm_follow_pfn *kfp, kvm_pfn_t *p_pfn)
|
||||
{
|
||||
struct follow_pfnmap_args args = { .vma = vma, .address = kfp->hva };
|
||||
bool write_fault = kfp->flags & FOLL_WRITE;
|
||||
kvm_pfn_t pfn;
|
||||
int r;
|
||||
|
||||
r = follow_pfnmap_start(&args);
|
||||
@ -2867,37 +2886,13 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
if (write_fault && !args.writable) {
|
||||
pfn = KVM_PFN_ERR_RO_FAULT;
|
||||
*p_pfn = KVM_PFN_ERR_RO_FAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kfp->map_writable)
|
||||
*kfp->map_writable = args.writable;
|
||||
pfn = args.pfn;
|
||||
|
||||
/*
|
||||
* Get a reference here because callers of *hva_to_pfn* and
|
||||
* *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
|
||||
* returned pfn. This is only needed if the VMA has VM_MIXEDMAP
|
||||
* set, but the kvm_try_get_pfn/kvm_release_pfn_clean pair will
|
||||
* simply do nothing for reserved pfns.
|
||||
*
|
||||
* Whoever called remap_pfn_range is also going to call e.g.
|
||||
* unmap_mapping_range before the underlying pages are freed,
|
||||
* causing a call to our MMU notifier.
|
||||
*
|
||||
* Certain IO or PFNMAP mappings can be backed with valid
|
||||
* struct pages, but be allocated without refcounting e.g.,
|
||||
* tail pages of non-compound higher order allocations, which
|
||||
* would then underflow the refcount when the caller does the
|
||||
* required put_page. Don't allow those pages here.
|
||||
*/
|
||||
if (!kvm_try_get_pfn(pfn))
|
||||
r = -EFAULT;
|
||||
*p_pfn = kvm_resolve_pfn(kfp, NULL, &args, args.writable);
|
||||
out:
|
||||
follow_pfnmap_end(&args);
|
||||
*p_pfn = pfn;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,15 @@ struct kvm_follow_pfn {
|
||||
* Set to true if a writable mapping was obtained.
|
||||
*/
|
||||
bool *map_writable;
|
||||
|
||||
/*
|
||||
* Optional output. Set to a valid "struct page" if the returned pfn
|
||||
* is for a refcounted or pinned struct page, NULL if the returned pfn
|
||||
* has no struct page or if the struct page is not being refcounted
|
||||
* (e.g. tail pages of non-compound higher order allocations from
|
||||
* IO/PFNMAP mappings).
|
||||
*/
|
||||
struct page **refcounted_page;
|
||||
};
|
||||
|
||||
kvm_pfn_t hva_to_pfn(struct kvm_follow_pfn *kfp);
|
||||
|
Loading…
x
Reference in New Issue
Block a user