From 9b132fbe5419d789f1ef396bed5eb66a365dd1e9 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Tue, 4 Dec 2012 10:35:13 +0800 Subject: [PATCH 1/4] Add rcu user eqs exception hooks for async page fault This patch adds user eqs exception hooks for async page fault page not present code path, to exit the user eqs and re-enter it as necessary. Async page fault is different from other exceptions that it may be triggered from idle process, so we still need rcu_irq_enter() and rcu_irq_exit() to exit cpu idle eqs when needed, to protect the code that needs use rcu. As Frederic pointed out it would be safest and simplest to protect the whole kvm_async_pf_task_wait(). Otherwise, "we need to check all the code there deeply for potential RCU uses and ensure it will never be extended later to use RCU.". However, We'd better re-enter the cpu idle eqs if we get the exception in cpu idle eqs, by calling rcu_irq_exit() before native_safe_halt(). So the patch does what Frederic suggested for rcu_irq_*() API usage here, except that I moved the rcu_irq_*() pair originally in do_async_page_fault() into kvm_async_pf_task_wait(). That's because, I think it's better to have rcu_irq_*() pairs to be in one function ( rcu_irq_exit() after rcu_irq_enter() ), especially here, kvm_async_pf_task_wait() has other callers, which might cause rcu_irq_exit() be called without a matching rcu_irq_enter() before it, which is illegal if the cpu happens to be in rcu idle state. Signed-off-by: Li Zhong Signed-off-by: Gleb Natapov --- arch/x86/kernel/kvm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 08b973f64032..9c2bd8bd4b4c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -43,6 +43,7 @@ #include #include #include +#include static int kvmapf = 1; @@ -121,6 +122,8 @@ void kvm_async_pf_task_wait(u32 token) struct kvm_task_sleep_node n, *e; DEFINE_WAIT(wait); + rcu_irq_enter(); + spin_lock(&b->lock); e = _find_apf_task(b, token); if (e) { @@ -128,6 +131,8 @@ void kvm_async_pf_task_wait(u32 token) hlist_del(&e->link); kfree(e); spin_unlock(&b->lock); + + rcu_irq_exit(); return; } @@ -152,13 +157,16 @@ void kvm_async_pf_task_wait(u32 token) /* * We cannot reschedule. So halt. */ + rcu_irq_exit(); native_safe_halt(); + rcu_irq_enter(); local_irq_disable(); } } if (!n.halted) finish_wait(&n.wq, &wait); + rcu_irq_exit(); return; } EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); @@ -252,10 +260,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ - rcu_irq_enter(); + exception_enter(regs); exit_idle(); kvm_async_pf_task_wait((u32)read_cr2()); - rcu_irq_exit(); + exception_exit(regs); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); From 1509ae56cdc19a12f76000e31602e4119fffc571 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 18 Dec 2012 01:21:28 +0000 Subject: [PATCH 2/4] powerpc: Corrected include header path in kvm_para.h The include/uapi/asm/kvm_para.h includes but the correct reference should be as this is the place where make install_header installs the header files for userspace. Signed-off-by: Bharat Bhushan CC: stable@vger.kernel.org Signed-off-by: Alexander Graf --- arch/powerpc/include/uapi/asm/kvm_para.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h index ed0e0254b47f..e3af3286a068 100644 --- a/arch/powerpc/include/uapi/asm/kvm_para.h +++ b/arch/powerpc/include/uapi/asm/kvm_para.h @@ -78,7 +78,7 @@ struct kvm_vcpu_arch_shared { #define KVM_HCALL_TOKEN(num) _EV_HCALL_TOKEN(EV_KVM_VENDOR_ID, num) -#include +#include #define KVM_FEATURE_MAGIC_PAGE 1 From d591390da94234d5b8eb1bbd9f2a25d2e780d528 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sat, 22 Dec 2012 04:09:17 +0000 Subject: [PATCH 3/4] KVM: PPC: Book3S HV: Fix compilation without CONFIG_PPC_POWERNV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes this build breakage: arch/powerpc/kvm/book3s_hv_ras.c: In function ‘kvmppc_realmode_mc_power7’: arch/powerpc/kvm/book3s_hv_ras.c:126:23: error: ‘struct paca_struct’ has no member named ‘opal_mc_evt’ Signed-off-by: Andreas Schwab Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv_ras.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 35f3cf0269b3..a353c485808c 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -79,7 +79,9 @@ static void flush_tlb_power7(struct kvm_vcpu *vcpu) static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) { unsigned long srr1 = vcpu->arch.shregs.msr; +#ifdef CONFIG_PPC_POWERNV struct opal_machine_check_event *opal_evt; +#endif long handled = 1; if (srr1 & SRR1_MC_LDSTERR) { @@ -117,6 +119,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) handled = 0; } +#ifdef CONFIG_PPC_POWERNV /* * See if OPAL has already handled the condition. * We assume that if the condition is recovered then OPAL @@ -131,6 +134,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) if (handled) opal_evt->in_use = 0; +#endif return handled; } From 013f6a5d3dd9e4ebf4b49ca427b9c1f2e2a1b767 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 3 Jan 2013 11:41:39 -0200 Subject: [PATCH 4/4] KVM: x86: use dynamic percpu allocations for shared msrs area Use dynamic percpu allocations for the shared msrs structure, to avoid using the limited reserved percpu space. Reviewed-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76f54461f7cb..c243b81e3c74 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -120,7 +120,7 @@ struct kvm_shared_msrs { }; static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; -static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); +static struct kvm_shared_msrs __percpu *shared_msrs; struct kvm_stats_debugfs_item debugfs_entries[] = { { "pf_fixed", VCPU_STAT(pf_fixed) }, @@ -191,10 +191,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn) static void shared_msr_update(unsigned slot, u32 msr) { - struct kvm_shared_msrs *smsr; u64 value; + unsigned int cpu = smp_processor_id(); + struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); - smsr = &__get_cpu_var(shared_msrs); /* only read, and nobody should modify it at this time, * so don't need lock */ if (slot >= shared_msrs_global.nr) { @@ -226,7 +226,8 @@ static void kvm_shared_msr_cpu_online(void) void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) { - struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); + unsigned int cpu = smp_processor_id(); + struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); if (((value ^ smsr->values[slot].curr) & mask) == 0) return; @@ -242,7 +243,8 @@ EXPORT_SYMBOL_GPL(kvm_set_shared_msr); static void drop_user_return_notifiers(void *ignore) { - struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); + unsigned int cpu = smp_processor_id(); + struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); if (smsr->registered) kvm_on_user_return(&smsr->urn); @@ -5233,9 +5235,16 @@ int kvm_arch_init(void *opaque) goto out; } + r = -ENOMEM; + shared_msrs = alloc_percpu(struct kvm_shared_msrs); + if (!shared_msrs) { + printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n"); + goto out; + } + r = kvm_mmu_module_init(); if (r) - goto out; + goto out_free_percpu; kvm_set_mmio_spte_mask(); kvm_init_msr_list(); @@ -5258,6 +5267,8 @@ int kvm_arch_init(void *opaque) return 0; +out_free_percpu: + free_percpu(shared_msrs); out: return r; } @@ -5275,6 +5286,7 @@ void kvm_arch_exit(void) #endif kvm_x86_ops = NULL; kvm_mmu_module_exit(); + free_percpu(shared_msrs); } int kvm_emulate_halt(struct kvm_vcpu *vcpu)