mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-28 16:53:49 +00:00
KVM: Pin (as in FOLL_PIN) pages during kvm_vcpu_map()
Pin, as in FOLL_PIN, pages when mapping them for direct access by KVM. As per Documentation/core-api/pin_user_pages.rst, writing to a page that was gotten via FOLL_GET is explicitly disallowed. Correct (uses FOLL_PIN calls): pin_user_pages() write to the data within the pages unpin_user_pages() INCORRECT (uses FOLL_GET calls): get_user_pages() write to the data within the pages put_page() Unfortunately, FOLL_PIN is a "private" flag, and so kvm_follow_pfn must use a one-off bool instead of being able to piggyback the "flags" field. Link: https://lwn.net/Articles/930667 Link: https://lore.kernel.org/all/cover.1683044162.git.lstoakes@gmail.com Tested-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Sean Christopherson <seanjc@google.com> Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Message-ID: <20241010182427.1434605-32-seanjc@google.com>
This commit is contained in:
parent
2ff072ba7a
commit
2bcb52a360
@ -280,7 +280,7 @@ struct kvm_host_map {
|
||||
* can be used as guest memory but they are not managed by host
|
||||
* kernel).
|
||||
*/
|
||||
struct page *refcounted_page;
|
||||
struct page *pinned_page;
|
||||
struct page *page;
|
||||
void *hva;
|
||||
kvm_pfn_t pfn;
|
||||
|
@ -2814,9 +2814,12 @@ static kvm_pfn_t kvm_resolve_pfn(struct kvm_follow_pfn *kfp, struct page *page,
|
||||
*/
|
||||
if (map) {
|
||||
pfn = map->pfn;
|
||||
page = kvm_pfn_to_refcounted_page(pfn);
|
||||
if (page && !get_page_unless_zero(page))
|
||||
return KVM_PFN_ERR_FAULT;
|
||||
|
||||
if (!kfp->pin) {
|
||||
page = kvm_pfn_to_refcounted_page(pfn);
|
||||
if (page && !get_page_unless_zero(page))
|
||||
return KVM_PFN_ERR_FAULT;
|
||||
}
|
||||
} else {
|
||||
pfn = page_to_pfn(page);
|
||||
}
|
||||
@ -2834,16 +2837,24 @@ static kvm_pfn_t kvm_resolve_pfn(struct kvm_follow_pfn *kfp, struct page *page,
|
||||
static bool hva_to_pfn_fast(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
|
||||
{
|
||||
struct page *page;
|
||||
bool r;
|
||||
|
||||
/*
|
||||
* Fast pin a writable pfn only if it is a write fault request
|
||||
* or the caller allows to map a writable pfn for a read fault
|
||||
* request.
|
||||
* Try the fast-only path when the caller wants to pin/get the page for
|
||||
* writing. If the caller only wants to read the page, KVM must go
|
||||
* down the full, slow path in order to avoid racing an operation that
|
||||
* breaks Copy-on-Write (CoW), e.g. so that KVM doesn't end up pointing
|
||||
* at the old, read-only page while mm/ points at a new, writable page.
|
||||
*/
|
||||
if (!((kfp->flags & FOLL_WRITE) || kfp->map_writable))
|
||||
return false;
|
||||
|
||||
if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, &page)) {
|
||||
if (kfp->pin)
|
||||
r = pin_user_pages_fast(kfp->hva, 1, FOLL_WRITE, &page) == 1;
|
||||
else
|
||||
r = get_user_page_fast_only(kfp->hva, FOLL_WRITE, &page);
|
||||
|
||||
if (r) {
|
||||
*pfn = kvm_resolve_pfn(kfp, page, NULL, true);
|
||||
return true;
|
||||
}
|
||||
@ -2872,10 +2883,21 @@ static int hva_to_pfn_slow(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
|
||||
struct page *page, *wpage;
|
||||
int npages;
|
||||
|
||||
npages = get_user_pages_unlocked(kfp->hva, 1, &page, flags);
|
||||
if (kfp->pin)
|
||||
npages = pin_user_pages_unlocked(kfp->hva, 1, &page, flags);
|
||||
else
|
||||
npages = get_user_pages_unlocked(kfp->hva, 1, &page, flags);
|
||||
if (npages != 1)
|
||||
return npages;
|
||||
|
||||
/*
|
||||
* Pinning is mutually exclusive with opportunistically mapping a read
|
||||
* fault as writable, as KVM should never pin pages when mapping memory
|
||||
* into the guest (pinning is only for direct accesses from KVM).
|
||||
*/
|
||||
if (WARN_ON_ONCE(kfp->map_writable && kfp->pin))
|
||||
goto out;
|
||||
|
||||
/* map read fault as writable if possible */
|
||||
if (!(flags & FOLL_WRITE) && kfp->map_writable &&
|
||||
get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) {
|
||||
@ -2884,6 +2906,7 @@ static int hva_to_pfn_slow(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
|
||||
flags |= FOLL_WRITE;
|
||||
}
|
||||
|
||||
out:
|
||||
*pfn = kvm_resolve_pfn(kfp, page, NULL, flags & FOLL_WRITE);
|
||||
return npages;
|
||||
}
|
||||
@ -3093,10 +3116,11 @@ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
|
||||
.slot = gfn_to_memslot(vcpu->kvm, gfn),
|
||||
.gfn = gfn,
|
||||
.flags = FOLL_WRITE,
|
||||
.refcounted_page = &map->refcounted_page,
|
||||
.refcounted_page = &map->pinned_page,
|
||||
.pin = true,
|
||||
};
|
||||
|
||||
map->refcounted_page = NULL;
|
||||
map->pinned_page = NULL;
|
||||
map->page = NULL;
|
||||
map->hva = NULL;
|
||||
map->gfn = gfn;
|
||||
@ -3133,16 +3157,16 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
|
||||
if (dirty)
|
||||
kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
|
||||
|
||||
if (map->refcounted_page) {
|
||||
if (map->pinned_page) {
|
||||
if (dirty)
|
||||
kvm_release_page_dirty(map->refcounted_page);
|
||||
else
|
||||
kvm_release_page_clean(map->refcounted_page);
|
||||
kvm_set_page_dirty(map->pinned_page);
|
||||
kvm_set_page_accessed(map->pinned_page);
|
||||
unpin_user_page(map->pinned_page);
|
||||
}
|
||||
|
||||
map->hva = NULL;
|
||||
map->page = NULL;
|
||||
map->refcounted_page = NULL;
|
||||
map->pinned_page = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
|
||||
|
||||
|
@ -30,6 +30,13 @@ struct kvm_follow_pfn {
|
||||
/* FOLL_* flags modifying lookup behavior, e.g. FOLL_WRITE. */
|
||||
unsigned int flags;
|
||||
|
||||
/*
|
||||
* Pin the page (effectively FOLL_PIN, which is an mm/ internal flag).
|
||||
* The page *must* be pinned if KVM will write to the page via a kernel
|
||||
* mapping, e.g. via kmap(), mremap(), etc.
|
||||
*/
|
||||
bool pin;
|
||||
|
||||
/*
|
||||
* If non-NULL, try to get a writable mapping even for a read fault.
|
||||
* Set to true if a writable mapping was obtained.
|
||||
|
Loading…
Reference in New Issue
Block a user