mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-16 18:08:20 +00:00
* fix latent bug in how usage of large pages is determined for
confidential VMs * fix "underline too short" in docs * eliminate log spam from limited APIC timer periods * disallow pre-faulting of memory before SEV-SNP VMs are initialized * delay clearing and encrypting private memory until it is added to guest page tables * this change also enables another small cleanup: the checks in SNP_LAUNCH_UPDATE that limit it to non-populated, private pages can now be moved in the common kvm_gmem_populate() function * fix compilation error that the RISC-V merge introduced in selftests -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmatCoMUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMUmgf9FwuSypOyZeZM4DKpNoMdaDQGVBn2 nTUYDJdiggmUNWA5MenqKtZ5N7G79iDO5HNDOUGBXn33f78EimDxsSC+Xfy54RNF SwEUZxQR/y81xOF2LIzfisWkNY+4Bf9fDALUbAlOj/O0E/YHDO9gk+ZNnvdHkWMe 72euiii1xlIV/+Snq7QQZU2UiUNalIfN0wCtPRYG9RGbG+yF2ksm01QU3aE8Q2uu aSN3/DxfiFmKPEP5YQ1qXyntpQ8hA1WfONuUUhmgBgZlSdPS93nyL7y030QDzhgn /JayovN14I3S73rLcepmw3Jx4vTltX1QJA+JqBoKBv/gXJQ8ZCqyLzqrvQ== =ExK2 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm updates from Paolo Bonzini: "The bulk of the changes here is a largish change to guest_memfd, delaying the clearing and encryption of guest-private pages until they are actually added to guest page tables. This started as "let's make it impossible to misuse the API" for SEV-SNP; but then it ballooned a bit. The new logic is generally simpler and more ready for hugepage support in guest_memfd. Summary: - fix latent bug in how usage of large pages is determined for confidential VMs - fix "underline too short" in docs - eliminate log spam from limited APIC timer periods - disallow pre-faulting of memory before SEV-SNP VMs are initialized - delay clearing and encrypting private memory until it is added to guest page tables - this change also enables another small cleanup: the checks in SNP_LAUNCH_UPDATE that limit it to non-populated, private pages can now be moved in the common kvm_gmem_populate() function - fix compilation error that the RISC-V merge introduced in selftests" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86/mmu: fix determination of max NPT mapping level for private pages KVM: riscv: selftests: Fix compile error KVM: guest_memfd: abstract how prepared folios are recorded KVM: guest_memfd: let kvm_gmem_populate() operate only on private gfns KVM: extend kvm_range_has_memory_attributes() to check subset of attributes KVM: cleanup and add shortcuts to kvm_range_has_memory_attributes() KVM: guest_memfd: move check for already-populated page to common code KVM: remove kvm_arch_gmem_prepare_needed() KVM: guest_memfd: make kvm_gmem_prepare_folio() operate on a single struct kvm KVM: guest_memfd: delay kvm_gmem_prepare_folio() until the memory is passed to the guest KVM: guest_memfd: return locked folio from __kvm_gmem_get_pfn KVM: rename CONFIG_HAVE_KVM_GMEM_* to CONFIG_HAVE_KVM_ARCH_GMEM_* KVM: guest_memfd: do not go through struct page KVM: guest_memfd: delay folio_mark_uptodate() until after successful preparation KVM: guest_memfd: return folio from __kvm_gmem_get_pfn() KVM: x86: disallow pre-fault for SNP VMs before initialization KVM: Documentation: Fix title underline too short warning KVM: x86: Eliminate log spam from limited APIC timer periods
This commit is contained in:
commit
725d410fac
@ -6368,7 +6368,7 @@ a single guest_memfd file, but the bound ranges must not overlap).
|
||||
See KVM_SET_USER_MEMORY_REGION2 for additional details.
|
||||
|
||||
4.143 KVM_PRE_FAULT_MEMORY
|
||||
------------------------
|
||||
---------------------------
|
||||
|
||||
:Capability: KVM_CAP_PRE_FAULT_MEMORY
|
||||
:Architectures: none
|
||||
@ -6405,6 +6405,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a
|
||||
stage-2 read page fault, e.g. faults in memory as needed, but doesn't break
|
||||
CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed.
|
||||
|
||||
In the case of confidential VM types where there is an initial set up of
|
||||
private guest memory before the guest is 'finalized'/measured, this ioctl
|
||||
should only be issued after completing all the necessary setup to put the
|
||||
guest into a 'finalized' state so that the above semantics can be reliably
|
||||
ensured.
|
||||
|
||||
In some cases, multiple vCPUs might share the page tables. In this
|
||||
case, the ioctl can be called in parallel.
|
||||
|
||||
|
@ -1305,6 +1305,7 @@ struct kvm_arch {
|
||||
u8 vm_type;
|
||||
bool has_private_mem;
|
||||
bool has_protected_state;
|
||||
bool pre_fault_allowed;
|
||||
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
||||
struct list_head active_mmu_pages;
|
||||
struct list_head zapped_obsolete_pages;
|
||||
|
@ -141,8 +141,8 @@ config KVM_AMD_SEV
|
||||
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
|
||||
select ARCH_HAS_CC_PLATFORM
|
||||
select KVM_GENERIC_PRIVATE_MEM
|
||||
select HAVE_KVM_GMEM_PREPARE
|
||||
select HAVE_KVM_GMEM_INVALIDATE
|
||||
select HAVE_KVM_ARCH_GMEM_PREPARE
|
||||
select HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||
help
|
||||
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
|
||||
with Encrypted State (SEV-ES) on AMD processors.
|
||||
|
@ -1743,7 +1743,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
|
||||
s64 min_period = min_timer_period_us * 1000LL;
|
||||
|
||||
if (apic->lapic_timer.period < min_period) {
|
||||
pr_info_ratelimited(
|
||||
pr_info_once(
|
||||
"vcpu %i: requested %lld ns "
|
||||
"lapic timer period limited to %lld ns\n",
|
||||
apic->vcpu->vcpu_id,
|
||||
|
@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
|
||||
if (req_max_level)
|
||||
max_level = min(max_level, req_max_level);
|
||||
|
||||
return req_max_level;
|
||||
return max_level;
|
||||
}
|
||||
|
||||
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
|
||||
@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
|
||||
u64 end;
|
||||
int r;
|
||||
|
||||
if (!vcpu->kvm->arch.pre_fault_allowed)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* reload is efficient when called repeatedly, so we can do it on
|
||||
* every iteration.
|
||||
@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
const unsigned long end = start + KVM_PAGES_PER_HPAGE(level);
|
||||
|
||||
if (level == PG_LEVEL_2M)
|
||||
return kvm_range_has_memory_attributes(kvm, start, end, attrs);
|
||||
return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs);
|
||||
|
||||
for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) {
|
||||
if (hugepage_test_mixed(slot, gfn, level - 1) ||
|
||||
|
@ -2279,18 +2279,11 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf
|
||||
bool assigned;
|
||||
int level;
|
||||
|
||||
if (!kvm_mem_is_private(kvm, gfn)) {
|
||||
pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n",
|
||||
__func__, gfn);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level);
|
||||
if (ret || assigned) {
|
||||
pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n",
|
||||
__func__, gfn, ret, assigned);
|
||||
ret = -EINVAL;
|
||||
ret = ret ? -EINVAL : -EEXIST;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -2549,6 +2542,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
data->gctx_paddr = __psp_pa(sev->snp_context);
|
||||
ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
|
||||
|
||||
/*
|
||||
* Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages
|
||||
* can be given to the guest simply by marking the RMP entry as private.
|
||||
* This can happen on first access and also with KVM_PRE_FAULT_MEMORY.
|
||||
*/
|
||||
if (!ret)
|
||||
kvm->arch.pre_fault_allowed = true;
|
||||
|
||||
kfree(id_auth);
|
||||
|
||||
e_free_id_block:
|
||||
|
@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm)
|
||||
to_kvm_sev_info(kvm)->need_init = true;
|
||||
|
||||
kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM);
|
||||
kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem;
|
||||
}
|
||||
|
||||
if (!pause_filter_count || !pause_filter_thresh)
|
||||
|
@ -12646,6 +12646,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
kvm->arch.vm_type = type;
|
||||
kvm->arch.has_private_mem =
|
||||
(type == KVM_X86_SW_PROTECTED_VM);
|
||||
/* Decided by the vendor code for other VM types. */
|
||||
kvm->arch.pre_fault_allowed =
|
||||
type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM;
|
||||
|
||||
ret = kvm_page_track_init(kvm);
|
||||
if (ret)
|
||||
@ -13641,19 +13644,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
|
||||
bool kvm_arch_gmem_prepare_needed(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.vm_type == KVM_X86_SNP_VM;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
|
||||
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
|
||||
{
|
||||
return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
|
||||
{
|
||||
kvm_x86_call(gmem_invalidate)(start, end);
|
||||
|
@ -2414,7 +2414,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn
|
||||
}
|
||||
|
||||
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||
unsigned long attrs);
|
||||
unsigned long mask, unsigned long attrs);
|
||||
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
|
||||
struct kvm_gfn_range *range);
|
||||
bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
|
||||
@ -2445,11 +2445,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
|
||||
}
|
||||
#endif /* CONFIG_KVM_PRIVATE_MEM */
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
|
||||
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
|
||||
bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
|
||||
/**
|
||||
* kvm_gmem_populate() - Populate/prepare a GPA range with guest data
|
||||
*
|
||||
@ -2476,8 +2476,9 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
|
||||
|
||||
long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
|
||||
kvm_gmem_populate_cb post_populate, void *opaque);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
|
||||
#endif
|
||||
|
||||
|
@ -961,10 +961,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA),
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB),
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD),
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF),
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
|
||||
KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
|
||||
|
@ -113,10 +113,10 @@ config KVM_GENERIC_PRIVATE_MEM
|
||||
select KVM_PRIVATE_MEM
|
||||
bool
|
||||
|
||||
config HAVE_KVM_GMEM_PREPARE
|
||||
config HAVE_KVM_ARCH_GMEM_PREPARE
|
||||
bool
|
||||
depends on KVM_PRIVATE_MEM
|
||||
|
||||
config HAVE_KVM_GMEM_INVALIDATE
|
||||
config HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||
bool
|
||||
depends on KVM_PRIVATE_MEM
|
||||
|
@ -13,84 +13,93 @@ struct kvm_gmem {
|
||||
struct list_head entry;
|
||||
};
|
||||
|
||||
static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio)
|
||||
/**
|
||||
* folio_file_pfn - like folio_file_page, but return a pfn.
|
||||
* @folio: The folio which contains this index.
|
||||
* @index: The index we want to look up.
|
||||
*
|
||||
* Return: The pfn for this index.
|
||||
*/
|
||||
static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index)
|
||||
{
|
||||
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
|
||||
struct list_head *gmem_list = &inode->i_mapping->i_private_list;
|
||||
struct kvm_gmem *gmem;
|
||||
return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1));
|
||||
}
|
||||
|
||||
list_for_each_entry(gmem, gmem_list, entry) {
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm *kvm = gmem->kvm;
|
||||
struct page *page;
|
||||
kvm_pfn_t pfn;
|
||||
gfn_t gfn;
|
||||
int rc;
|
||||
|
||||
if (!kvm_arch_gmem_prepare_needed(kvm))
|
||||
continue;
|
||||
|
||||
slot = xa_load(&gmem->bindings, index);
|
||||
if (!slot)
|
||||
continue;
|
||||
|
||||
page = folio_file_page(folio, index);
|
||||
pfn = page_to_pfn(page);
|
||||
gfn = slot->base_gfn + index - slot->gmem.pgoff;
|
||||
rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page)));
|
||||
if (rc) {
|
||||
pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
|
||||
index, gfn, pfn, rc);
|
||||
return rc;
|
||||
}
|
||||
static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
pgoff_t index, struct folio *folio)
|
||||
{
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
|
||||
kvm_pfn_t pfn = folio_file_pfn(folio, index);
|
||||
gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff;
|
||||
int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio));
|
||||
if (rc) {
|
||||
pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
|
||||
index, gfn, pfn, rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
|
||||
static inline void kvm_gmem_mark_prepared(struct folio *folio)
|
||||
{
|
||||
struct folio *folio;
|
||||
folio_mark_uptodate(folio);
|
||||
}
|
||||
|
||||
/* TODO: Support huge pages. */
|
||||
folio = filemap_grab_folio(inode->i_mapping, index);
|
||||
if (IS_ERR(folio))
|
||||
return folio;
|
||||
/*
|
||||
* Process @folio, which contains @gfn, so that the guest can use it.
|
||||
* The folio must be locked and the gfn must be contained in @slot.
|
||||
* On successful return the guest sees a zero page so as to avoid
|
||||
* leaking host data and the up-to-date flag is set.
|
||||
*/
|
||||
static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, struct folio *folio)
|
||||
{
|
||||
unsigned long nr_pages, i;
|
||||
pgoff_t index;
|
||||
int r;
|
||||
|
||||
nr_pages = folio_nr_pages(folio);
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
clear_highpage(folio_page(folio, i));
|
||||
|
||||
/*
|
||||
* Use the up-to-date flag to track whether or not the memory has been
|
||||
* zeroed before being handed off to the guest. There is no backing
|
||||
* storage for the memory, so the folio will remain up-to-date until
|
||||
* it's removed.
|
||||
* Preparing huge folios should always be safe, since it should
|
||||
* be possible to split them later if needed.
|
||||
*
|
||||
* TODO: Skip clearing pages when trusted firmware will do it when
|
||||
* assigning memory to the guest.
|
||||
* Right now the folio order is always going to be zero, but the
|
||||
* code is ready for huge folios. The only assumption is that
|
||||
* the base pgoff of memslots is naturally aligned with the
|
||||
* requested page order, ensuring that huge folios can also use
|
||||
* huge page table entries for GPA->HPA mapping.
|
||||
*
|
||||
* The order will be passed when creating the guest_memfd, and
|
||||
* checked when creating memslots.
|
||||
*/
|
||||
if (!folio_test_uptodate(folio)) {
|
||||
unsigned long nr_pages = folio_nr_pages(folio);
|
||||
unsigned long i;
|
||||
WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio)));
|
||||
index = gfn - slot->base_gfn + slot->gmem.pgoff;
|
||||
index = ALIGN_DOWN(index, 1 << folio_order(folio));
|
||||
r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
|
||||
if (!r)
|
||||
kvm_gmem_mark_prepared(folio);
|
||||
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
clear_highpage(folio_page(folio, i));
|
||||
return r;
|
||||
}
|
||||
|
||||
folio_mark_uptodate(folio);
|
||||
}
|
||||
|
||||
if (prepare) {
|
||||
int r = kvm_gmem_prepare_folio(inode, index, folio);
|
||||
if (r < 0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
return ERR_PTR(r);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Ignore accessed, referenced, and dirty flags. The memory is
|
||||
* unevictable and there is no storage to write back to.
|
||||
*/
|
||||
return folio;
|
||||
/*
|
||||
* Returns a locked folio on success. The caller is responsible for
|
||||
* setting the up-to-date flag before the memory is mapped into the guest.
|
||||
* There is no backing storage for the memory, so the folio will remain
|
||||
* up-to-date until it's removed.
|
||||
*
|
||||
* Ignore accessed, referenced, and dirty flags. The memory is
|
||||
* unevictable and there is no storage to write back to.
|
||||
*/
|
||||
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
|
||||
{
|
||||
/* TODO: Support huge pages. */
|
||||
return filemap_grab_folio(inode->i_mapping, index);
|
||||
}
|
||||
|
||||
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
|
||||
@ -190,7 +199,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
|
||||
break;
|
||||
}
|
||||
|
||||
folio = kvm_gmem_get_folio(inode, index, true);
|
||||
folio = kvm_gmem_get_folio(inode, index);
|
||||
if (IS_ERR(folio)) {
|
||||
r = PTR_ERR(folio);
|
||||
break;
|
||||
@ -343,7 +352,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
|
||||
return MF_DELAYED;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||
static void kvm_gmem_free_folio(struct folio *folio)
|
||||
{
|
||||
struct page *page = folio_page(folio, 0);
|
||||
@ -358,7 +367,7 @@ static const struct address_space_operations kvm_gmem_aops = {
|
||||
.dirty_folio = noop_dirty_folio,
|
||||
.migrate_folio = kvm_gmem_migrate_folio,
|
||||
.error_remove_folio = kvm_gmem_error_folio,
|
||||
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||
.free_folio = kvm_gmem_free_folio,
|
||||
#endif
|
||||
};
|
||||
@ -541,64 +550,76 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
|
||||
fput(file);
|
||||
}
|
||||
|
||||
static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
|
||||
/* Returns a locked folio on success. */
|
||||
static struct folio *
|
||||
__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared,
|
||||
int *max_order)
|
||||
{
|
||||
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
|
||||
struct kvm_gmem *gmem = file->private_data;
|
||||
struct folio *folio;
|
||||
struct page *page;
|
||||
int r;
|
||||
|
||||
if (file != slot->gmem.file) {
|
||||
WARN_ON_ONCE(slot->gmem.file);
|
||||
return -EFAULT;
|
||||
return ERR_PTR(-EFAULT);
|
||||
}
|
||||
|
||||
gmem = file->private_data;
|
||||
if (xa_load(&gmem->bindings, index) != slot) {
|
||||
WARN_ON_ONCE(xa_load(&gmem->bindings, index));
|
||||
return -EIO;
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
|
||||
folio = kvm_gmem_get_folio(file_inode(file), index);
|
||||
if (IS_ERR(folio))
|
||||
return PTR_ERR(folio);
|
||||
return folio;
|
||||
|
||||
if (folio_test_hwpoison(folio)) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
return -EHWPOISON;
|
||||
return ERR_PTR(-EHWPOISON);
|
||||
}
|
||||
|
||||
page = folio_file_page(folio, index);
|
||||
|
||||
*pfn = page_to_pfn(page);
|
||||
*pfn = folio_file_pfn(folio, index);
|
||||
if (max_order)
|
||||
*max_order = 0;
|
||||
|
||||
r = 0;
|
||||
|
||||
folio_unlock(folio);
|
||||
|
||||
return r;
|
||||
*is_prepared = folio_test_uptodate(folio);
|
||||
return folio;
|
||||
}
|
||||
|
||||
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
|
||||
{
|
||||
struct file *file = kvm_gmem_get_file(slot);
|
||||
int r;
|
||||
struct folio *folio;
|
||||
bool is_prepared = false;
|
||||
int r = 0;
|
||||
|
||||
if (!file)
|
||||
return -EFAULT;
|
||||
|
||||
r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
|
||||
folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order);
|
||||
if (IS_ERR(folio)) {
|
||||
r = PTR_ERR(folio);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!is_prepared)
|
||||
r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
|
||||
|
||||
folio_unlock(folio);
|
||||
if (r < 0)
|
||||
folio_put(folio);
|
||||
|
||||
out:
|
||||
fput(file);
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
|
||||
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
|
||||
kvm_gmem_populate_cb post_populate, void *opaque)
|
||||
{
|
||||
@ -625,7 +646,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
|
||||
|
||||
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
|
||||
for (i = 0; i < npages; i += (1 << max_order)) {
|
||||
struct folio *folio;
|
||||
gfn_t gfn = start_gfn + i;
|
||||
bool is_prepared = false;
|
||||
kvm_pfn_t pfn;
|
||||
|
||||
if (signal_pending(current)) {
|
||||
@ -633,18 +656,39 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
|
||||
break;
|
||||
}
|
||||
|
||||
ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false);
|
||||
if (ret)
|
||||
folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order);
|
||||
if (IS_ERR(folio)) {
|
||||
ret = PTR_ERR(folio);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!IS_ALIGNED(gfn, (1 << max_order)) ||
|
||||
(npages - i) < (1 << max_order))
|
||||
max_order = 0;
|
||||
if (is_prepared) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
ret = -EEXIST;
|
||||
break;
|
||||
}
|
||||
|
||||
folio_unlock(folio);
|
||||
WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) ||
|
||||
(npages - i) < (1 << max_order));
|
||||
|
||||
ret = -EINVAL;
|
||||
while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order),
|
||||
KVM_MEMORY_ATTRIBUTE_PRIVATE,
|
||||
KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
|
||||
if (!max_order)
|
||||
goto put_folio_and_exit;
|
||||
max_order--;
|
||||
}
|
||||
|
||||
p = src ? src + i * PAGE_SIZE : NULL;
|
||||
ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
|
||||
if (!ret)
|
||||
kvm_gmem_mark_prepared(folio);
|
||||
|
||||
put_page(pfn_to_page(pfn));
|
||||
put_folio_and_exit:
|
||||
folio_put(folio);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -655,3 +699,4 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
|
||||
return ret && !i ? ret : i;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_gmem_populate);
|
||||
#endif
|
||||
|
@ -2398,42 +2398,6 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
|
||||
#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
/*
|
||||
* Returns true if _all_ gfns in the range [@start, @end) have attributes
|
||||
* matching @attrs.
|
||||
*/
|
||||
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||
unsigned long attrs)
|
||||
{
|
||||
XA_STATE(xas, &kvm->mem_attr_array, start);
|
||||
unsigned long index;
|
||||
bool has_attrs;
|
||||
void *entry;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
if (!attrs) {
|
||||
has_attrs = !xas_find(&xas, end - 1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
has_attrs = true;
|
||||
for (index = start; index < end; index++) {
|
||||
do {
|
||||
entry = xas_next(&xas);
|
||||
} while (xas_retry(&xas, entry));
|
||||
|
||||
if (xas.xa_index != index || xa_to_value(entry) != attrs) {
|
||||
has_attrs = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return has_attrs;
|
||||
}
|
||||
|
||||
static u64 kvm_supported_mem_attributes(struct kvm *kvm)
|
||||
{
|
||||
if (!kvm || kvm_arch_has_private_mem(kvm))
|
||||
@ -2442,6 +2406,41 @@ static u64 kvm_supported_mem_attributes(struct kvm *kvm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if _all_ gfns in the range [@start, @end) have attributes
|
||||
* such that the bits in @mask match @attrs.
|
||||
*/
|
||||
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||
unsigned long mask, unsigned long attrs)
|
||||
{
|
||||
XA_STATE(xas, &kvm->mem_attr_array, start);
|
||||
unsigned long index;
|
||||
void *entry;
|
||||
|
||||
mask &= kvm_supported_mem_attributes(kvm);
|
||||
if (attrs & ~mask)
|
||||
return false;
|
||||
|
||||
if (end == start + 1)
|
||||
return (kvm_get_memory_attributes(kvm, start) & mask) == attrs;
|
||||
|
||||
guard(rcu)();
|
||||
if (!attrs)
|
||||
return !xas_find(&xas, end - 1);
|
||||
|
||||
for (index = start; index < end; index++) {
|
||||
do {
|
||||
entry = xas_next(&xas);
|
||||
} while (xas_retry(&xas, entry));
|
||||
|
||||
if (xas.xa_index != index ||
|
||||
(xa_to_value(entry) & mask) != attrs)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static __always_inline void kvm_handle_gfn_range(struct kvm *kvm,
|
||||
struct kvm_mmu_notifier_range *range)
|
||||
{
|
||||
@ -2534,7 +2533,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
/* Nothing to do if the entire range as the desired attributes. */
|
||||
if (kvm_range_has_memory_attributes(kvm, start, end, attributes))
|
||||
if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user