mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-12 00:00:00 +00:00
kvm: x86: mmu: allow A/D bits to be disabled in an mmu
Adds the plumbing to disable A/D bits in the MMU based on a new role bit, ad_disabled. When A/D is disabled, the MMU operates as though A/D aren't available (i.e., using access tracking faults instead). To avoid SP -> kvm_mmu_page.role.ad_disabled lookups all over the place, A/D disablement is now stored in the SPTE. This state is stored in the SPTE by tweaking the use of SPTE_SPECIAL_MASK for access tracking. Rather than just setting SPTE_SPECIAL_MASK when an access-tracking SPTE is non-present, we now always set SPTE_SPECIAL_MASK for access-tracking SPTEs. Signed-off-by: Peter Feiner <pfeiner@google.com> [Use role.ad_disabled even for direct (non-shadow) EPT page tables. Add documentation and a few MMU_WARN_ONs. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
dcdca5fed5
commit
ac8d57e573
@ -179,6 +179,10 @@ Shadow pages contain the following information:
|
|||||||
shadow page; it is also used to go back from a struct kvm_mmu_page
|
shadow page; it is also used to go back from a struct kvm_mmu_page
|
||||||
to a memslot, through the kvm_memslots_for_spte_role macro and
|
to a memslot, through the kvm_memslots_for_spte_role macro and
|
||||||
__gfn_to_memslot.
|
__gfn_to_memslot.
|
||||||
|
role.ad_disabled:
|
||||||
|
Is 1 if the MMU instance cannot use A/D bits. EPT did not have A/D
|
||||||
|
bits before Haswell; shadow EPT page tables also cannot use A/D bits
|
||||||
|
if the L1 hypervisor does not enable them.
|
||||||
gfn:
|
gfn:
|
||||||
Either the guest page table containing the translations shadowed by this
|
Either the guest page table containing the translations shadowed by this
|
||||||
page, or the base page frame for linear translations. See role.direct.
|
page, or the base page frame for linear translations. See role.direct.
|
||||||
|
@ -257,7 +257,8 @@ union kvm_mmu_page_role {
|
|||||||
unsigned cr0_wp:1;
|
unsigned cr0_wp:1;
|
||||||
unsigned smep_andnot_wp:1;
|
unsigned smep_andnot_wp:1;
|
||||||
unsigned smap_andnot_wp:1;
|
unsigned smap_andnot_wp:1;
|
||||||
unsigned :8;
|
unsigned ad_disabled:1;
|
||||||
|
unsigned :7;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is left at the top of the word so that
|
* This is left at the top of the word so that
|
||||||
|
@ -187,10 +187,9 @@ static u64 __read_mostly shadow_mmio_value;
|
|||||||
static u64 __read_mostly shadow_present_mask;
|
static u64 __read_mostly shadow_present_mask;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The mask/value to distinguish a PTE that has been marked not-present for
|
* SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value.
|
||||||
* access tracking purposes.
|
* Non-present SPTEs with shadow_acc_track_value set are in place for access
|
||||||
* The mask would be either 0 if access tracking is disabled, or
|
* tracking.
|
||||||
* SPTE_SPECIAL_MASK|VMX_EPT_RWX_MASK if access tracking is enabled.
|
|
||||||
*/
|
*/
|
||||||
static u64 __read_mostly shadow_acc_track_mask;
|
static u64 __read_mostly shadow_acc_track_mask;
|
||||||
static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK;
|
static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK;
|
||||||
@ -216,10 +215,32 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
|
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
|
||||||
|
|
||||||
|
static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
|
||||||
|
{
|
||||||
|
return sp->role.ad_disabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool spte_ad_enabled(u64 spte)
|
||||||
|
{
|
||||||
|
MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
|
||||||
|
return !(spte & shadow_acc_track_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 spte_shadow_accessed_mask(u64 spte)
|
||||||
|
{
|
||||||
|
MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
|
||||||
|
return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 spte_shadow_dirty_mask(u64 spte)
|
||||||
|
{
|
||||||
|
MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
|
||||||
|
return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool is_access_track_spte(u64 spte)
|
static inline bool is_access_track_spte(u64 spte)
|
||||||
{
|
{
|
||||||
/* Always false if shadow_acc_track_mask is zero. */
|
return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0;
|
||||||
return (spte & shadow_acc_track_mask) == shadow_acc_track_value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -329,10 +350,9 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
|||||||
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
|
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
|
||||||
u64 acc_track_mask)
|
u64 acc_track_mask)
|
||||||
{
|
{
|
||||||
if (acc_track_mask != 0)
|
|
||||||
acc_track_mask |= SPTE_SPECIAL_MASK;
|
|
||||||
BUG_ON(!dirty_mask != !accessed_mask);
|
BUG_ON(!dirty_mask != !accessed_mask);
|
||||||
BUG_ON(!accessed_mask && !acc_track_mask);
|
BUG_ON(!accessed_mask && !acc_track_mask);
|
||||||
|
BUG_ON(acc_track_mask & shadow_acc_track_value);
|
||||||
|
|
||||||
shadow_user_mask = user_mask;
|
shadow_user_mask = user_mask;
|
||||||
shadow_accessed_mask = accessed_mask;
|
shadow_accessed_mask = accessed_mask;
|
||||||
@ -341,7 +361,6 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
|
|||||||
shadow_x_mask = x_mask;
|
shadow_x_mask = x_mask;
|
||||||
shadow_present_mask = p_mask;
|
shadow_present_mask = p_mask;
|
||||||
shadow_acc_track_mask = acc_track_mask;
|
shadow_acc_track_mask = acc_track_mask;
|
||||||
WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
|
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
|
||||||
|
|
||||||
@ -561,7 +580,7 @@ static bool spte_has_volatile_bits(u64 spte)
|
|||||||
is_access_track_spte(spte))
|
is_access_track_spte(spte))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (shadow_accessed_mask) {
|
if (spte_ad_enabled(spte)) {
|
||||||
if ((spte & shadow_accessed_mask) == 0 ||
|
if ((spte & shadow_accessed_mask) == 0 ||
|
||||||
(is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
|
(is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
|
||||||
return true;
|
return true;
|
||||||
@ -572,14 +591,17 @@ static bool spte_has_volatile_bits(u64 spte)
|
|||||||
|
|
||||||
static bool is_accessed_spte(u64 spte)
|
static bool is_accessed_spte(u64 spte)
|
||||||
{
|
{
|
||||||
return shadow_accessed_mask ? spte & shadow_accessed_mask
|
u64 accessed_mask = spte_shadow_accessed_mask(spte);
|
||||||
: !is_access_track_spte(spte);
|
|
||||||
|
return accessed_mask ? spte & accessed_mask
|
||||||
|
: !is_access_track_spte(spte);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_dirty_spte(u64 spte)
|
static bool is_dirty_spte(u64 spte)
|
||||||
{
|
{
|
||||||
return shadow_dirty_mask ? spte & shadow_dirty_mask
|
u64 dirty_mask = spte_shadow_dirty_mask(spte);
|
||||||
: spte & PT_WRITABLE_MASK;
|
|
||||||
|
return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Rules for using mmu_spte_set:
|
/* Rules for using mmu_spte_set:
|
||||||
@ -719,10 +741,10 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
|
|||||||
|
|
||||||
static u64 mark_spte_for_access_track(u64 spte)
|
static u64 mark_spte_for_access_track(u64 spte)
|
||||||
{
|
{
|
||||||
if (shadow_accessed_mask != 0)
|
if (spte_ad_enabled(spte))
|
||||||
return spte & ~shadow_accessed_mask;
|
return spte & ~shadow_accessed_mask;
|
||||||
|
|
||||||
if (shadow_acc_track_mask == 0 || is_access_track_spte(spte))
|
if (is_access_track_spte(spte))
|
||||||
return spte;
|
return spte;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -741,7 +763,6 @@ static u64 mark_spte_for_access_track(u64 spte)
|
|||||||
spte |= (spte & shadow_acc_track_saved_bits_mask) <<
|
spte |= (spte & shadow_acc_track_saved_bits_mask) <<
|
||||||
shadow_acc_track_saved_bits_shift;
|
shadow_acc_track_saved_bits_shift;
|
||||||
spte &= ~shadow_acc_track_mask;
|
spte &= ~shadow_acc_track_mask;
|
||||||
spte |= shadow_acc_track_value;
|
|
||||||
|
|
||||||
return spte;
|
return spte;
|
||||||
}
|
}
|
||||||
@ -753,6 +774,7 @@ static u64 restore_acc_track_spte(u64 spte)
|
|||||||
u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift)
|
u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift)
|
||||||
& shadow_acc_track_saved_bits_mask;
|
& shadow_acc_track_saved_bits_mask;
|
||||||
|
|
||||||
|
WARN_ON_ONCE(spte_ad_enabled(spte));
|
||||||
WARN_ON_ONCE(!is_access_track_spte(spte));
|
WARN_ON_ONCE(!is_access_track_spte(spte));
|
||||||
|
|
||||||
new_spte &= ~shadow_acc_track_mask;
|
new_spte &= ~shadow_acc_track_mask;
|
||||||
@ -771,7 +793,7 @@ static bool mmu_spte_age(u64 *sptep)
|
|||||||
if (!is_accessed_spte(spte))
|
if (!is_accessed_spte(spte))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (shadow_accessed_mask) {
|
if (spte_ad_enabled(spte)) {
|
||||||
clear_bit((ffs(shadow_accessed_mask) - 1),
|
clear_bit((ffs(shadow_accessed_mask) - 1),
|
||||||
(unsigned long *)sptep);
|
(unsigned long *)sptep);
|
||||||
} else {
|
} else {
|
||||||
@ -1402,6 +1424,22 @@ static bool spte_clear_dirty(u64 *sptep)
|
|||||||
return mmu_spte_update(sptep, spte);
|
return mmu_spte_update(sptep, spte);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool wrprot_ad_disabled_spte(u64 *sptep)
|
||||||
|
{
|
||||||
|
bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
|
||||||
|
(unsigned long *)sptep);
|
||||||
|
if (was_writable)
|
||||||
|
kvm_set_pfn_dirty(spte_to_pfn(*sptep));
|
||||||
|
|
||||||
|
return was_writable;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Gets the GFN ready for another round of dirty logging by clearing the
|
||||||
|
* - D bit on ad-enabled SPTEs, and
|
||||||
|
* - W bit on ad-disabled SPTEs.
|
||||||
|
* Returns true iff any D or W bits were cleared.
|
||||||
|
*/
|
||||||
static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
|
static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
@ -1409,7 +1447,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
|
|||||||
bool flush = false;
|
bool flush = false;
|
||||||
|
|
||||||
for_each_rmap_spte(rmap_head, &iter, sptep)
|
for_each_rmap_spte(rmap_head, &iter, sptep)
|
||||||
flush |= spte_clear_dirty(sptep);
|
if (spte_ad_enabled(*sptep))
|
||||||
|
flush |= spte_clear_dirty(sptep);
|
||||||
|
else
|
||||||
|
flush |= wrprot_ad_disabled_spte(sptep);
|
||||||
|
|
||||||
return flush;
|
return flush;
|
||||||
}
|
}
|
||||||
@ -1432,7 +1473,8 @@ static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
|
|||||||
bool flush = false;
|
bool flush = false;
|
||||||
|
|
||||||
for_each_rmap_spte(rmap_head, &iter, sptep)
|
for_each_rmap_spte(rmap_head, &iter, sptep)
|
||||||
flush |= spte_set_dirty(sptep);
|
if (spte_ad_enabled(*sptep))
|
||||||
|
flush |= spte_set_dirty(sptep);
|
||||||
|
|
||||||
return flush;
|
return flush;
|
||||||
}
|
}
|
||||||
@ -1464,7 +1506,8 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages
|
* kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write
|
||||||
|
* protect the page if the D-bit isn't supported.
|
||||||
* @kvm: kvm instance
|
* @kvm: kvm instance
|
||||||
* @slot: slot to clear D-bit
|
* @slot: slot to clear D-bit
|
||||||
* @gfn_offset: start of the BITS_PER_LONG pages we care about
|
* @gfn_offset: start of the BITS_PER_LONG pages we care about
|
||||||
@ -2389,7 +2432,12 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||||||
BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
|
BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
|
||||||
|
|
||||||
spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
|
spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
|
||||||
shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
|
shadow_user_mask | shadow_x_mask;
|
||||||
|
|
||||||
|
if (sp_ad_disabled(sp))
|
||||||
|
spte |= shadow_acc_track_value;
|
||||||
|
else
|
||||||
|
spte |= shadow_accessed_mask;
|
||||||
|
|
||||||
mmu_spte_set(sptep, spte);
|
mmu_spte_set(sptep, spte);
|
||||||
|
|
||||||
@ -2657,10 +2705,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||||||
{
|
{
|
||||||
u64 spte = 0;
|
u64 spte = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
struct kvm_mmu_page *sp;
|
||||||
|
|
||||||
if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
|
if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
sp = page_header(__pa(sptep));
|
||||||
|
if (sp_ad_disabled(sp))
|
||||||
|
spte |= shadow_acc_track_value;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For the EPT case, shadow_present_mask is 0 if hardware
|
* For the EPT case, shadow_present_mask is 0 if hardware
|
||||||
* supports exec-only page table entries. In that case,
|
* supports exec-only page table entries. In that case,
|
||||||
@ -2669,7 +2722,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||||||
*/
|
*/
|
||||||
spte |= shadow_present_mask;
|
spte |= shadow_present_mask;
|
||||||
if (!speculative)
|
if (!speculative)
|
||||||
spte |= shadow_accessed_mask;
|
spte |= spte_shadow_accessed_mask(spte);
|
||||||
|
|
||||||
if (pte_access & ACC_EXEC_MASK)
|
if (pte_access & ACC_EXEC_MASK)
|
||||||
spte |= shadow_x_mask;
|
spte |= shadow_x_mask;
|
||||||
@ -2726,7 +2779,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
|||||||
|
|
||||||
if (pte_access & ACC_WRITE_MASK) {
|
if (pte_access & ACC_WRITE_MASK) {
|
||||||
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
||||||
spte |= shadow_dirty_mask;
|
spte |= spte_shadow_dirty_mask(spte);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (speculative)
|
if (speculative)
|
||||||
@ -2868,16 +2921,16 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
|
|||||||
{
|
{
|
||||||
struct kvm_mmu_page *sp;
|
struct kvm_mmu_page *sp;
|
||||||
|
|
||||||
|
sp = page_header(__pa(sptep));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since it's no accessed bit on EPT, it's no way to
|
* Without accessed bits, there's no way to distinguish between
|
||||||
* distinguish between actually accessed translations
|
* actually accessed translations and prefetched, so disable pte
|
||||||
* and prefetched, so disable pte prefetch if EPT is
|
* prefetch if accessed bits aren't available.
|
||||||
* enabled.
|
|
||||||
*/
|
*/
|
||||||
if (!shadow_accessed_mask)
|
if (sp_ad_disabled(sp))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
sp = page_header(__pa(sptep));
|
|
||||||
if (sp->role.level > PT_PAGE_TABLE_LEVEL)
|
if (sp->role.level > PT_PAGE_TABLE_LEVEL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -4278,6 +4331,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
context->base_role.word = 0;
|
context->base_role.word = 0;
|
||||||
context->base_role.smm = is_smm(vcpu);
|
context->base_role.smm = is_smm(vcpu);
|
||||||
|
context->base_role.ad_disabled = (shadow_accessed_mask == 0);
|
||||||
context->page_fault = tdp_page_fault;
|
context->page_fault = tdp_page_fault;
|
||||||
context->sync_page = nonpaging_sync_page;
|
context->sync_page = nonpaging_sync_page;
|
||||||
context->invlpg = nonpaging_invlpg;
|
context->invlpg = nonpaging_invlpg;
|
||||||
@ -4624,6 +4678,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|||||||
mask.smep_andnot_wp = 1;
|
mask.smep_andnot_wp = 1;
|
||||||
mask.smap_andnot_wp = 1;
|
mask.smap_andnot_wp = 1;
|
||||||
mask.smm = 1;
|
mask.smm = 1;
|
||||||
|
mask.ad_disabled = 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we don't have indirect shadow pages, it means no page is
|
* If we don't have indirect shadow pages, it means no page is
|
||||||
|
@ -30,8 +30,9 @@
|
|||||||
\
|
\
|
||||||
role.word = __entry->role; \
|
role.word = __entry->role; \
|
||||||
\
|
\
|
||||||
trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s" \
|
trace_seq_printf(p, "sp gen %lx gfn %llx l%u%s q%u%s %s%s" \
|
||||||
" %snxe root %u %s%c", __entry->mmu_valid_gen, \
|
" %snxe %sad root %u %s%c", \
|
||||||
|
__entry->mmu_valid_gen, \
|
||||||
__entry->gfn, role.level, \
|
__entry->gfn, role.level, \
|
||||||
role.cr4_pae ? " pae" : "", \
|
role.cr4_pae ? " pae" : "", \
|
||||||
role.quadrant, \
|
role.quadrant, \
|
||||||
@ -39,6 +40,7 @@
|
|||||||
access_str[role.access], \
|
access_str[role.access], \
|
||||||
role.invalid ? " invalid" : "", \
|
role.invalid ? " invalid" : "", \
|
||||||
role.nxe ? "" : "!", \
|
role.nxe ? "" : "!", \
|
||||||
|
role.ad_disabled ? "!" : "", \
|
||||||
__entry->root_count, \
|
__entry->root_count, \
|
||||||
__entry->unsync ? "unsync" : "sync", 0); \
|
__entry->unsync ? "unsync" : "sync", 0); \
|
||||||
saved_ptr; \
|
saved_ptr; \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user