* fix latent bug in how usage of large pages is determined for

confidential VMs
 
 * fix "underline too short" in docs
 
 * eliminate log spam from limited APIC timer periods
 
 * disallow pre-faulting of memory before SEV-SNP VMs are initialized
 
 * delay clearing and encrypting private memory until it is added to
   guest page tables
 
 * this change also enables another small cleanup: the checks in
   SNP_LAUNCH_UPDATE that limit it to non-populated, private pages
   can now be moved in the common kvm_gmem_populate() function
 
 * fix compilation error that the RISC-V merge introduced in selftests
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmatCoMUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroMUmgf9FwuSypOyZeZM4DKpNoMdaDQGVBn2
 nTUYDJdiggmUNWA5MenqKtZ5N7G79iDO5HNDOUGBXn33f78EimDxsSC+Xfy54RNF
 SwEUZxQR/y81xOF2LIzfisWkNY+4Bf9fDALUbAlOj/O0E/YHDO9gk+ZNnvdHkWMe
 72euiii1xlIV/+Snq7QQZU2UiUNalIfN0wCtPRYG9RGbG+yF2ksm01QU3aE8Q2uu
 aSN3/DxfiFmKPEP5YQ1qXyntpQ8hA1WfONuUUhmgBgZlSdPS93nyL7y030QDzhgn
 /JayovN14I3S73rLcepmw3Jx4vTltX1QJA+JqBoKBv/gXJQ8ZCqyLzqrvQ==
 =ExK2
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Paolo Bonzini:
 "The bulk of the changes here is a largish change to guest_memfd,
  delaying the clearing and encryption of guest-private pages until they
  are actually added to guest page tables. This started as "let's make
  it impossible to misuse the API" for SEV-SNP; but then it ballooned a
  bit.

  The new logic is generally simpler and more ready for hugepage support
  in guest_memfd.

  Summary:

   - fix latent bug in how usage of large pages is determined for
     confidential VMs

   - fix "underline too short" in docs

   - eliminate log spam from limited APIC timer periods

   - disallow pre-faulting of memory before SEV-SNP VMs are initialized

   - delay clearing and encrypting private memory until it is added to
     guest page tables

   - this change also enables another small cleanup: the checks in
     SNP_LAUNCH_UPDATE that limit it to non-populated, private pages can
     now be moved in the common kvm_gmem_populate() function

   - fix compilation error that the RISC-V merge introduced in selftests"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86/mmu: fix determination of max NPT mapping level for private pages
  KVM: riscv: selftests: Fix compile error
  KVM: guest_memfd: abstract how prepared folios are recorded
  KVM: guest_memfd: let kvm_gmem_populate() operate only on private gfns
  KVM: extend kvm_range_has_memory_attributes() to check subset of attributes
  KVM: cleanup and add shortcuts to kvm_range_has_memory_attributes()
  KVM: guest_memfd: move check for already-populated page to common code
  KVM: remove kvm_arch_gmem_prepare_needed()
  KVM: guest_memfd: make kvm_gmem_prepare_folio() operate on a single struct kvm
  KVM: guest_memfd: delay kvm_gmem_prepare_folio() until the memory is passed to the guest
  KVM: guest_memfd: return locked folio from __kvm_gmem_get_pfn
  KVM: rename CONFIG_HAVE_KVM_GMEM_* to CONFIG_HAVE_KVM_ARCH_GMEM_*
  KVM: guest_memfd: do not go through struct page
  KVM: guest_memfd: delay folio_mark_uptodate() until after successful preparation
  KVM: guest_memfd: return folio from __kvm_gmem_get_pfn()
  KVM: x86: disallow pre-fault for SNP VMs before initialization
  KVM: Documentation: Fix title underline too short warning
  KVM: x86: Eliminate log spam from limited APIC timer periods
This commit is contained in:
Linus Torvalds 2024-08-02 10:17:49 -07:00
commit 725d410fac
13 changed files with 214 additions and 159 deletions

View File

@ -6368,7 +6368,7 @@ a single guest_memfd file, but the bound ranges must not overlap).
See KVM_SET_USER_MEMORY_REGION2 for additional details.
4.143 KVM_PRE_FAULT_MEMORY
------------------------
---------------------------
:Capability: KVM_CAP_PRE_FAULT_MEMORY
:Architectures: none
@ -6405,6 +6405,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a
stage-2 read page fault, e.g. faults in memory as needed, but doesn't break
CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed.
In the case of confidential VM types where there is an initial set up of
private guest memory before the guest is 'finalized'/measured, this ioctl
should only be issued after completing all the necessary setup to put the
guest into a 'finalized' state so that the above semantics can be reliably
ensured.
In some cases, multiple vCPUs might share the page tables. In this
case, the ioctl can be called in parallel.

View File

@ -1305,6 +1305,7 @@ struct kvm_arch {
u8 vm_type;
bool has_private_mem;
bool has_protected_state;
bool pre_fault_allowed;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;

View File

@ -141,8 +141,8 @@ config KVM_AMD_SEV
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
select ARCH_HAS_CC_PLATFORM
select KVM_GENERIC_PRIVATE_MEM
select HAVE_KVM_GMEM_PREPARE
select HAVE_KVM_GMEM_INVALIDATE
select HAVE_KVM_ARCH_GMEM_PREPARE
select HAVE_KVM_ARCH_GMEM_INVALIDATE
help
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.

View File

@ -1743,7 +1743,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
s64 min_period = min_timer_period_us * 1000LL;
if (apic->lapic_timer.period < min_period) {
pr_info_ratelimited(
pr_info_once(
"vcpu %i: requested %lld ns "
"lapic timer period limited to %lld ns\n",
apic->vcpu->vcpu_id,

View File

@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
if (req_max_level)
max_level = min(max_level, req_max_level);
return req_max_level;
return max_level;
}
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
u64 end;
int r;
if (!vcpu->kvm->arch.pre_fault_allowed)
return -EOPNOTSUPP;
/*
* reload is efficient when called repeatedly, so we can do it on
* every iteration.
@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
const unsigned long end = start + KVM_PAGES_PER_HPAGE(level);
if (level == PG_LEVEL_2M)
return kvm_range_has_memory_attributes(kvm, start, end, attrs);
return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs);
for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) {
if (hugepage_test_mixed(slot, gfn, level - 1) ||

View File

@ -2279,18 +2279,11 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf
bool assigned;
int level;
if (!kvm_mem_is_private(kvm, gfn)) {
pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n",
__func__, gfn);
ret = -EINVAL;
goto err;
}
ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level);
if (ret || assigned) {
pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n",
__func__, gfn, ret, assigned);
ret = -EINVAL;
ret = ret ? -EINVAL : -EEXIST;
goto err;
}
@ -2549,6 +2542,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
data->gctx_paddr = __psp_pa(sev->snp_context);
ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
/*
* Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages
* can be given to the guest simply by marking the RMP entry as private.
* This can happen on first access and also with KVM_PRE_FAULT_MEMORY.
*/
if (!ret)
kvm->arch.pre_fault_allowed = true;
kfree(id_auth);
e_free_id_block:

View File

@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm)
to_kvm_sev_info(kvm)->need_init = true;
kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM);
kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem;
}
if (!pause_filter_count || !pause_filter_thresh)

View File

@ -12646,6 +12646,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.vm_type = type;
kvm->arch.has_private_mem =
(type == KVM_X86_SW_PROTECTED_VM);
/* Decided by the vendor code for other VM types. */
kvm->arch.pre_fault_allowed =
type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM;
ret = kvm_page_track_init(kvm);
if (ret)
@ -13641,19 +13644,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
bool kvm_arch_gmem_prepare_needed(struct kvm *kvm)
{
return kvm->arch.vm_type == KVM_X86_SNP_VM;
}
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
{
return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order);
}
#endif
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
{
kvm_x86_call(gmem_invalidate)(start, end);

View File

@ -2414,7 +2414,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn
}
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
unsigned long attrs);
unsigned long mask, unsigned long attrs);
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
struct kvm_gfn_range *range);
bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
@ -2445,11 +2445,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
}
#endif /* CONFIG_KVM_PRIVATE_MEM */
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
#endif
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
/**
* kvm_gmem_populate() - Populate/prepare a GPA range with guest data
*
@ -2476,8 +2476,9 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
kvm_gmem_populate_cb post_populate, void *opaque);
#endif
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
#endif

View File

@ -961,10 +961,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB);
KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA),
KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB),
KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD),
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF),
KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA);
KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB);
KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);

View File

@ -113,10 +113,10 @@ config KVM_GENERIC_PRIVATE_MEM
select KVM_PRIVATE_MEM
bool
config HAVE_KVM_GMEM_PREPARE
config HAVE_KVM_ARCH_GMEM_PREPARE
bool
depends on KVM_PRIVATE_MEM
config HAVE_KVM_GMEM_INVALIDATE
config HAVE_KVM_ARCH_GMEM_INVALIDATE
bool
depends on KVM_PRIVATE_MEM

View File

@ -13,84 +13,93 @@ struct kvm_gmem {
struct list_head entry;
};
static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio)
/**
* folio_file_pfn - like folio_file_page, but return a pfn.
* @folio: The folio which contains this index.
* @index: The index we want to look up.
*
* Return: The pfn for this index.
*/
static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index)
{
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
struct list_head *gmem_list = &inode->i_mapping->i_private_list;
struct kvm_gmem *gmem;
return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1));
}
list_for_each_entry(gmem, gmem_list, entry) {
struct kvm_memory_slot *slot;
struct kvm *kvm = gmem->kvm;
struct page *page;
kvm_pfn_t pfn;
gfn_t gfn;
int rc;
if (!kvm_arch_gmem_prepare_needed(kvm))
continue;
slot = xa_load(&gmem->bindings, index);
if (!slot)
continue;
page = folio_file_page(folio, index);
pfn = page_to_pfn(page);
gfn = slot->base_gfn + index - slot->gmem.pgoff;
rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page)));
if (rc) {
pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
index, gfn, pfn, rc);
return rc;
}
static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
pgoff_t index, struct folio *folio)
{
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
kvm_pfn_t pfn = folio_file_pfn(folio, index);
gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff;
int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio));
if (rc) {
pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
index, gfn, pfn, rc);
return rc;
}
#endif
return 0;
}
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
static inline void kvm_gmem_mark_prepared(struct folio *folio)
{
struct folio *folio;
folio_mark_uptodate(folio);
}
/* TODO: Support huge pages. */
folio = filemap_grab_folio(inode->i_mapping, index);
if (IS_ERR(folio))
return folio;
/*
* Process @folio, which contains @gfn, so that the guest can use it.
* The folio must be locked and the gfn must be contained in @slot.
* On successful return the guest sees a zero page so as to avoid
* leaking host data and the up-to-date flag is set.
*/
static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, struct folio *folio)
{
unsigned long nr_pages, i;
pgoff_t index;
int r;
nr_pages = folio_nr_pages(folio);
for (i = 0; i < nr_pages; i++)
clear_highpage(folio_page(folio, i));
/*
* Use the up-to-date flag to track whether or not the memory has been
* zeroed before being handed off to the guest. There is no backing
* storage for the memory, so the folio will remain up-to-date until
* it's removed.
* Preparing huge folios should always be safe, since it should
* be possible to split them later if needed.
*
* TODO: Skip clearing pages when trusted firmware will do it when
* assigning memory to the guest.
* Right now the folio order is always going to be zero, but the
* code is ready for huge folios. The only assumption is that
* the base pgoff of memslots is naturally aligned with the
* requested page order, ensuring that huge folios can also use
* huge page table entries for GPA->HPA mapping.
*
* The order will be passed when creating the guest_memfd, and
* checked when creating memslots.
*/
if (!folio_test_uptodate(folio)) {
unsigned long nr_pages = folio_nr_pages(folio);
unsigned long i;
WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio)));
index = gfn - slot->base_gfn + slot->gmem.pgoff;
index = ALIGN_DOWN(index, 1 << folio_order(folio));
r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
if (!r)
kvm_gmem_mark_prepared(folio);
for (i = 0; i < nr_pages; i++)
clear_highpage(folio_page(folio, i));
return r;
}
folio_mark_uptodate(folio);
}
if (prepare) {
int r = kvm_gmem_prepare_folio(inode, index, folio);
if (r < 0) {
folio_unlock(folio);
folio_put(folio);
return ERR_PTR(r);
}
}
/*
* Ignore accessed, referenced, and dirty flags. The memory is
* unevictable and there is no storage to write back to.
*/
return folio;
/*
* Returns a locked folio on success. The caller is responsible for
* setting the up-to-date flag before the memory is mapped into the guest.
* There is no backing storage for the memory, so the folio will remain
* up-to-date until it's removed.
*
* Ignore accessed, referenced, and dirty flags. The memory is
* unevictable and there is no storage to write back to.
*/
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
{
/* TODO: Support huge pages. */
return filemap_grab_folio(inode->i_mapping, index);
}
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
@ -190,7 +199,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
break;
}
folio = kvm_gmem_get_folio(inode, index, true);
folio = kvm_gmem_get_folio(inode, index);
if (IS_ERR(folio)) {
r = PTR_ERR(folio);
break;
@ -343,7 +352,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
return MF_DELAYED;
}
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
static void kvm_gmem_free_folio(struct folio *folio)
{
struct page *page = folio_page(folio, 0);
@ -358,7 +367,7 @@ static const struct address_space_operations kvm_gmem_aops = {
.dirty_folio = noop_dirty_folio,
.migrate_folio = kvm_gmem_migrate_folio,
.error_remove_folio = kvm_gmem_error_folio,
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
.free_folio = kvm_gmem_free_folio,
#endif
};
@ -541,64 +550,76 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
fput(file);
}
static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
/* Returns a locked folio on success. */
static struct folio *
__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared,
int *max_order)
{
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
struct kvm_gmem *gmem = file->private_data;
struct folio *folio;
struct page *page;
int r;
if (file != slot->gmem.file) {
WARN_ON_ONCE(slot->gmem.file);
return -EFAULT;
return ERR_PTR(-EFAULT);
}
gmem = file->private_data;
if (xa_load(&gmem->bindings, index) != slot) {
WARN_ON_ONCE(xa_load(&gmem->bindings, index));
return -EIO;
return ERR_PTR(-EIO);
}
folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
folio = kvm_gmem_get_folio(file_inode(file), index);
if (IS_ERR(folio))
return PTR_ERR(folio);
return folio;
if (folio_test_hwpoison(folio)) {
folio_unlock(folio);
folio_put(folio);
return -EHWPOISON;
return ERR_PTR(-EHWPOISON);
}
page = folio_file_page(folio, index);
*pfn = page_to_pfn(page);
*pfn = folio_file_pfn(folio, index);
if (max_order)
*max_order = 0;
r = 0;
folio_unlock(folio);
return r;
*is_prepared = folio_test_uptodate(folio);
return folio;
}
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
{
struct file *file = kvm_gmem_get_file(slot);
int r;
struct folio *folio;
bool is_prepared = false;
int r = 0;
if (!file)
return -EFAULT;
r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order);
if (IS_ERR(folio)) {
r = PTR_ERR(folio);
goto out;
}
if (!is_prepared)
r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
folio_unlock(folio);
if (r < 0)
folio_put(folio);
out:
fput(file);
return r;
}
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
kvm_gmem_populate_cb post_populate, void *opaque)
{
@ -625,7 +646,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
for (i = 0; i < npages; i += (1 << max_order)) {
struct folio *folio;
gfn_t gfn = start_gfn + i;
bool is_prepared = false;
kvm_pfn_t pfn;
if (signal_pending(current)) {
@ -633,18 +656,39 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
break;
}
ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false);
if (ret)
folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order);
if (IS_ERR(folio)) {
ret = PTR_ERR(folio);
break;
}
if (!IS_ALIGNED(gfn, (1 << max_order)) ||
(npages - i) < (1 << max_order))
max_order = 0;
if (is_prepared) {
folio_unlock(folio);
folio_put(folio);
ret = -EEXIST;
break;
}
folio_unlock(folio);
WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) ||
(npages - i) < (1 << max_order));
ret = -EINVAL;
while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order),
KVM_MEMORY_ATTRIBUTE_PRIVATE,
KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
if (!max_order)
goto put_folio_and_exit;
max_order--;
}
p = src ? src + i * PAGE_SIZE : NULL;
ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
if (!ret)
kvm_gmem_mark_prepared(folio);
put_page(pfn_to_page(pfn));
put_folio_and_exit:
folio_put(folio);
if (ret)
break;
}
@ -655,3 +699,4 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
return ret && !i ? ret : i;
}
EXPORT_SYMBOL_GPL(kvm_gmem_populate);
#endif

View File

@ -2398,42 +2398,6 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
/*
* Returns true if _all_ gfns in the range [@start, @end) have attributes
* matching @attrs.
*/
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
unsigned long attrs)
{
XA_STATE(xas, &kvm->mem_attr_array, start);
unsigned long index;
bool has_attrs;
void *entry;
rcu_read_lock();
if (!attrs) {
has_attrs = !xas_find(&xas, end - 1);
goto out;
}
has_attrs = true;
for (index = start; index < end; index++) {
do {
entry = xas_next(&xas);
} while (xas_retry(&xas, entry));
if (xas.xa_index != index || xa_to_value(entry) != attrs) {
has_attrs = false;
break;
}
}
out:
rcu_read_unlock();
return has_attrs;
}
static u64 kvm_supported_mem_attributes(struct kvm *kvm)
{
if (!kvm || kvm_arch_has_private_mem(kvm))
@ -2442,6 +2406,41 @@ static u64 kvm_supported_mem_attributes(struct kvm *kvm)
return 0;
}
/*
* Returns true if _all_ gfns in the range [@start, @end) have attributes
* such that the bits in @mask match @attrs.
*/
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
unsigned long mask, unsigned long attrs)
{
XA_STATE(xas, &kvm->mem_attr_array, start);
unsigned long index;
void *entry;
mask &= kvm_supported_mem_attributes(kvm);
if (attrs & ~mask)
return false;
if (end == start + 1)
return (kvm_get_memory_attributes(kvm, start) & mask) == attrs;
guard(rcu)();
if (!attrs)
return !xas_find(&xas, end - 1);
for (index = start; index < end; index++) {
do {
entry = xas_next(&xas);
} while (xas_retry(&xas, entry));
if (xas.xa_index != index ||
(xa_to_value(entry) & mask) != attrs)
return false;
}
return true;
}
static __always_inline void kvm_handle_gfn_range(struct kvm *kvm,
struct kvm_mmu_notifier_range *range)
{
@ -2534,7 +2533,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
mutex_lock(&kvm->slots_lock);
/* Nothing to do if the entire range as the desired attributes. */
if (kvm_range_has_memory_attributes(kvm, start, end, attributes))
if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes))
goto out_unlock;
/*