xen: fixes and featrues for 4.12

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQEcBAABAgAGBQJZChTBAAoJELDendYovxMvkXEIAJDpK5UKMsL1Ihgc0DL0OujQ
 UGxLfWJueSA1X7i8BgL/8vfgKxSEB9SUiM+ooHOKXS6oDhyk2RP4MuCe5+lhUbbv
 ZMK5KxHMlVUOD9EjYif8DhhiwRowBbWYEwr8XgY12s0Ya0a9TQLVC+noGsuzqNiH
 1UyzeeWlBae4nulUMMim6urPNq5AEPVeQKNX3S8rlnDp74IKVZuoISMM62b2KRSr
 +R8FVBshXR/HO53YNY0+AfmmUa8T1+dyjL50Eo/QnsG0i+3igOqNrzSKSc6T+nBt
 Zl3KDUE5W3/OlxuR+CIdZZ1KKtjzoAiR3cvVlHs2z7MIio87bJcYJforAqe6Evo=
 =k6in
 -----END PGP SIGNATURE-----

Merge tag 'for-linus-4.12b-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen updates from Juergen Gross:
 "Xen fixes and featrues for 4.12. The main changes are:

   - enable building the kernel with Xen support but without enabling
     paravirtualized mode (Vitaly Kuznetsov)

   - add a new 9pfs xen frontend driver (Stefano Stabellini)

   - simplify Xen's cpuid handling by making use of cpu capabilities
     (Juergen Gross)

   - add/modify some headers for new Xen paravirtualized devices
     (Oleksandr Andrushchenko)

   - EFI reset_system support under Xen (Julien Grall)

   - and the usual cleanups and corrections"

* tag 'for-linus-4.12b-rc0b-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (57 commits)
  xen: Move xen_have_vector_callback definition to enlighten.c
  xen: Implement EFI reset_system callback
  arm/xen: Consolidate calls to shutdown hypercall in a single helper
  xen: Export xen_reboot
  xen/x86: Call xen_smp_intr_init_pv() on BSP
  xen: Revert commits da72ff5bfc and 72a9b18629
  xen/pvh: Do not fill kernel's e820 map in init_pvh_bootparams()
  xen/scsifront: use offset_in_page() macro
  xen/arm,arm64: rename __generic_dma_ops to xen_get_dma_ops
  xen/arm,arm64: fix xen_dma_ops after 815dd18 "Consolidate get_dma_ops..."
  xen/9pfs: select CONFIG_XEN_XENBUS_FRONTEND
  x86/cpu: remove hypervisor specific set_cpu_features
  vmware: set cpu capabilities during platform initialization
  x86/xen: use capabilities instead of fake cpuid values for xsave
  x86/xen: use capabilities instead of fake cpuid values for x2apic
  x86/xen: use capabilities instead of fake cpuid values for mwait
  x86/xen: use capabilities instead of fake cpuid values for acpi
  x86/xen: use capabilities instead of fake cpuid values for acc
  x86/xen: use capabilities instead of fake cpuid values for mtrr
  x86/xen: use capabilities instead of fake cpuid values for aperf
  ...
This commit is contained in:
Linus Torvalds 2017-05-04 11:37:09 -07:00
commit a96480723c
53 changed files with 8412 additions and 5303 deletions

View File

@ -15,6 +15,9 @@ struct dev_archdata {
#endif #endif
#ifdef CONFIG_ARM_DMA_USE_IOMMU #ifdef CONFIG_ARM_DMA_USE_IOMMU
struct dma_iommu_mapping *mapping; struct dma_iommu_mapping *mapping;
#endif
#ifdef CONFIG_XEN
const struct dma_map_ops *dev_dma_ops;
#endif #endif
bool dma_coherent; bool dma_coherent;
}; };

View File

@ -16,19 +16,9 @@
extern const struct dma_map_ops arm_dma_ops; extern const struct dma_map_ops arm_dma_ops;
extern const struct dma_map_ops arm_coherent_dma_ops; extern const struct dma_map_ops arm_coherent_dma_ops;
static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
{
if (dev && dev->dma_ops)
return dev->dma_ops;
return &arm_dma_ops;
}
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{ {
if (xen_initial_domain()) return &arm_dma_ops;
return xen_dma_ops;
else
return __generic_dma_ops(NULL);
} }
#define HAVE_ARCH_DMA_SUPPORTED 1 #define HAVE_ARCH_DMA_SUPPORTED 1

View File

@ -2414,6 +2414,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dma_ops = arm_get_dma_map_ops(coherent); dma_ops = arm_get_dma_map_ops(coherent);
set_dma_ops(dev, dma_ops); set_dma_ops(dev, dma_ops);
#ifdef CONFIG_XEN
if (xen_initial_domain()) {
dev->archdata.dev_dma_ops = dev->dma_ops;
dev->dma_ops = xen_dma_ops;
}
#endif
} }
void arch_teardown_dma_ops(struct device *dev) void arch_teardown_dma_ops(struct device *dev)

View File

@ -35,6 +35,6 @@ void __init xen_efi_runtime_setup(void)
efi.update_capsule = xen_efi_update_capsule; efi.update_capsule = xen_efi_update_capsule;
efi.query_capsule_caps = xen_efi_query_capsule_caps; efi.query_capsule_caps = xen_efi_query_capsule_caps;
efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
efi.reset_system = NULL; /* Functionality provided by Xen. */ efi.reset_system = xen_efi_reset_system;
} }
EXPORT_SYMBOL_GPL(xen_efi_runtime_setup); EXPORT_SYMBOL_GPL(xen_efi_runtime_setup);

View File

@ -191,20 +191,24 @@ static int xen_dying_cpu(unsigned int cpu)
return 0; return 0;
} }
static void xen_restart(enum reboot_mode reboot_mode, const char *cmd) void xen_reboot(int reason)
{ {
struct sched_shutdown r = { .reason = SHUTDOWN_reboot }; struct sched_shutdown r = { .reason = reason };
int rc; int rc;
rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
BUG_ON(rc); BUG_ON(rc);
} }
static void xen_restart(enum reboot_mode reboot_mode, const char *cmd)
{
xen_reboot(SHUTDOWN_reboot);
}
static void xen_power_off(void) static void xen_power_off(void)
{ {
struct sched_shutdown r = { .reason = SHUTDOWN_poweroff }; xen_reboot(SHUTDOWN_poweroff);
int rc;
rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
BUG_ON(rc);
} }
static irqreturn_t xen_arm_callback(int irq, void *arg) static irqreturn_t xen_arm_callback(int irq, void *arg)

View File

@ -19,6 +19,9 @@
struct dev_archdata { struct dev_archdata {
#ifdef CONFIG_IOMMU_API #ifdef CONFIG_IOMMU_API
void *iommu; /* private IOMMU data */ void *iommu; /* private IOMMU data */
#endif
#ifdef CONFIG_XEN
const struct dma_map_ops *dev_dma_ops;
#endif #endif
bool dma_coherent; bool dma_coherent;
}; };

View File

@ -27,11 +27,8 @@
#define DMA_ERROR_CODE (~(dma_addr_t)0) #define DMA_ERROR_CODE (~(dma_addr_t)0)
extern const struct dma_map_ops dummy_dma_ops; extern const struct dma_map_ops dummy_dma_ops;
static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{ {
if (dev && dev->dma_ops)
return dev->dma_ops;
/* /*
* We expect no ISA devices, and all other DMA masters are expected to * We expect no ISA devices, and all other DMA masters are expected to
* have someone call arch_setup_dma_ops at device creation time. * have someone call arch_setup_dma_ops at device creation time.
@ -39,14 +36,6 @@ static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev)
return &dummy_dma_ops; return &dummy_dma_ops;
} }
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
if (xen_initial_domain())
return xen_dma_ops;
else
return __generic_dma_ops(NULL);
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent); const struct iommu_ops *iommu, bool coherent);
#define arch_setup_dma_ops arch_setup_dma_ops #define arch_setup_dma_ops arch_setup_dma_ops

View File

@ -977,4 +977,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev->archdata.dma_coherent = coherent; dev->archdata.dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu); __iommu_setup_dma_ops(dev, dma_base, size, iommu);
#ifdef CONFIG_XEN
if (xen_initial_domain()) {
dev->archdata.dev_dma_ops = dev->dma_ops;
dev->dma_ops = xen_dma_ops;
}
#endif
} }

View File

@ -35,9 +35,6 @@ struct hypervisor_x86 {
/* Detection routine */ /* Detection routine */
uint32_t (*detect)(void); uint32_t (*detect)(void);
/* Adjust CPU feature bits (run once per CPU) */
void (*set_cpu_features)(struct cpuinfo_x86 *);
/* Platform setup (run once per boot) */ /* Platform setup (run once per boot) */
void (*init_platform)(void); void (*init_platform)(void);
@ -53,15 +50,14 @@ extern const struct hypervisor_x86 *x86_hyper;
/* Recognized hypervisors */ /* Recognized hypervisors */
extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_vmware;
extern const struct hypervisor_x86 x86_hyper_ms_hyperv; extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
extern const struct hypervisor_x86 x86_hyper_xen; extern const struct hypervisor_x86 x86_hyper_xen_pv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
extern const struct hypervisor_x86 x86_hyper_kvm; extern const struct hypervisor_x86 x86_hyper_kvm;
extern void init_hypervisor(struct cpuinfo_x86 *c);
extern void init_hypervisor_platform(void); extern void init_hypervisor_platform(void);
extern bool hypervisor_x2apic_available(void); extern bool hypervisor_x2apic_available(void);
extern void hypervisor_pin_vcpu(int cpu); extern void hypervisor_pin_vcpu(int cpu);
#else #else
static inline void init_hypervisor(struct cpuinfo_x86 *c) { }
static inline void init_hypervisor_platform(void) { } static inline void init_hypervisor_platform(void) { }
static inline bool hypervisor_x2apic_available(void) { return false; } static inline bool hypervisor_x2apic_available(void) { return false; }
#endif /* CONFIG_HYPERVISOR_GUEST */ #endif /* CONFIG_HYPERVISOR_GUEST */

View File

@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
/* No need for a barrier -- XCHG is a barrier on x86. */ /* No need for a barrier -- XCHG is a barrier on x86. */
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) #define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
extern int xen_have_vector_callback;
/*
* Events delivered via platform PCI interrupts are always
* routed to vcpu 0 and hence cannot be rebound.
*/
static inline bool xen_support_evtchn_rebind(void)
{
return (!xen_hvm_domain() || xen_have_vector_callback);
}
#endif /* _ASM_X86_XEN_EVENTS_H */ #endif /* _ASM_X86_XEN_EVENTS_H */

View File

@ -52,12 +52,30 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long __init set_phys_range_identity(unsigned long pfn_s, extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e); unsigned long pfn_e);
#ifdef CONFIG_XEN_PV
extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops, struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_unmap_grant_ref *kunmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops,
struct page **pages, unsigned int count); struct page **pages, unsigned int count);
#else
static inline int
set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count)
{
return 0;
}
static inline int
clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_unmap_grant_ref *kunmap_ops,
struct page **pages, unsigned int count)
{
return 0;
}
#endif
/* /*
* Helper functions to write or read unsigned long values to/from * Helper functions to write or read unsigned long values to/from
@ -73,6 +91,7 @@ static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
return __get_user(*val, (unsigned long __user *)addr); return __get_user(*val, (unsigned long __user *)addr);
} }
#ifdef CONFIG_XEN_PV
/* /*
* When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine(): * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine():
* - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator
@ -99,6 +118,12 @@ static inline unsigned long __pfn_to_mfn(unsigned long pfn)
return mfn; return mfn;
} }
#else
static inline unsigned long __pfn_to_mfn(unsigned long pfn)
{
return pfn;
}
#endif
static inline unsigned long pfn_to_mfn(unsigned long pfn) static inline unsigned long pfn_to_mfn(unsigned long pfn)
{ {

View File

@ -1149,7 +1149,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
detect_ht(c); detect_ht(c);
#endif #endif
init_hypervisor(c);
x86_init_rdrand(c); x86_init_rdrand(c);
x86_init_cache_qos(c); x86_init_cache_qos(c);
setup_pku(c); setup_pku(c);

View File

@ -28,8 +28,11 @@
static const __initconst struct hypervisor_x86 * const hypervisors[] = static const __initconst struct hypervisor_x86 * const hypervisors[] =
{ {
#ifdef CONFIG_XEN #ifdef CONFIG_XEN_PV
&x86_hyper_xen, &x86_hyper_xen_pv,
#endif
#ifdef CONFIG_XEN_PVHVM
&x86_hyper_xen_hvm,
#endif #endif
&x86_hyper_vmware, &x86_hyper_vmware,
&x86_hyper_ms_hyperv, &x86_hyper_ms_hyperv,
@ -60,12 +63,6 @@ detect_hypervisor_vendor(void)
pr_info("Hypervisor detected: %s\n", x86_hyper->name); pr_info("Hypervisor detected: %s\n", x86_hyper->name);
} }
void init_hypervisor(struct cpuinfo_x86 *c)
{
if (x86_hyper && x86_hyper->set_cpu_features)
x86_hyper->set_cpu_features(c);
}
void __init init_hypervisor_platform(void) void __init init_hypervisor_platform(void)
{ {
@ -74,8 +71,6 @@ void __init init_hypervisor_platform(void)
if (!x86_hyper) if (!x86_hyper)
return; return;
init_hypervisor(&boot_cpu_data);
if (x86_hyper->init_platform) if (x86_hyper->init_platform)
x86_hyper->init_platform(); x86_hyper->init_platform();
} }

View File

@ -113,6 +113,24 @@ static void __init vmware_paravirt_ops_setup(void)
#define vmware_paravirt_ops_setup() do {} while (0) #define vmware_paravirt_ops_setup() do {} while (0)
#endif #endif
/*
* VMware hypervisor takes care of exporting a reliable TSC to the guest.
* Still, due to timing difference when running on virtual cpus, the TSC can
* be marked as unstable in some cases. For example, the TSC sync check at
* bootup can fail due to a marginal offset between vcpus' TSCs (though the
* TSCs do not drift from each other). Also, the ACPI PM timer clocksource
* is not suitable as a watchdog when running on a hypervisor because the
* kernel may miss a wrap of the counter if the vcpu is descheduled for a
* long time. To skip these checks at runtime we set these capability bits,
* so that the kernel could just trust the hypervisor with providing a
* reliable virtual TSC that is suitable for timekeeping.
*/
static void __init vmware_set_capabilities(void)
{
setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
}
static void __init vmware_platform_setup(void) static void __init vmware_platform_setup(void)
{ {
uint32_t eax, ebx, ecx, edx; uint32_t eax, ebx, ecx, edx;
@ -152,6 +170,8 @@ static void __init vmware_platform_setup(void)
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
no_timer_check = 1; no_timer_check = 1;
#endif #endif
vmware_set_capabilities();
} }
/* /*
@ -176,24 +196,6 @@ static uint32_t __init vmware_platform(void)
return 0; return 0;
} }
/*
* VMware hypervisor takes care of exporting a reliable TSC to the guest.
* Still, due to timing difference when running on virtual cpus, the TSC can
* be marked as unstable in some cases. For example, the TSC sync check at
* bootup can fail due to a marginal offset between vcpus' TSCs (though the
* TSCs do not drift from each other). Also, the ACPI PM timer clocksource
* is not suitable as a watchdog when running on a hypervisor because the
* kernel may miss a wrap of the counter if the vcpu is descheduled for a
* long time. To skip these checks at runtime we set these capability bits,
* so that the kernel could just trust the hypervisor with providing a
* reliable virtual TSC that is suitable for timekeeping.
*/
static void vmware_set_cpu_features(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
}
/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
static bool __init vmware_legacy_x2apic_available(void) static bool __init vmware_legacy_x2apic_available(void)
{ {
@ -206,7 +208,6 @@ static bool __init vmware_legacy_x2apic_available(void)
const __refconst struct hypervisor_x86 x86_hyper_vmware = { const __refconst struct hypervisor_x86 x86_hyper_vmware = {
.name = "VMware", .name = "VMware",
.detect = vmware_platform, .detect = vmware_platform,
.set_cpu_features = vmware_set_cpu_features,
.init_platform = vmware_platform_setup, .init_platform = vmware_platform_setup,
.x2apic_available = vmware_legacy_x2apic_available, .x2apic_available = vmware_legacy_x2apic_available,
}; };

View File

@ -446,7 +446,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss); __switch_to_xtra(prev_p, next_p, tss);
#ifdef CONFIG_XEN #ifdef CONFIG_XEN_PV
/* /*
* On Xen PV, IOPL bits in pt_regs->flags have no effect, and * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
* current_pt_regs()->flags may not match the current task's * current_pt_regs()->flags may not match the current task's

View File

@ -447,7 +447,7 @@ void __init xen_msi_init(void)
int __init pci_xen_hvm_init(void) int __init pci_xen_hvm_init(void)
{ {
if (!xen_feature(XENFEAT_hvm_pirqs)) if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
return 0; return 0;
#ifdef CONFIG_ACPI #ifdef CONFIG_ACPI

View File

@ -6,8 +6,6 @@ config XEN
bool "Xen guest support" bool "Xen guest support"
depends on PARAVIRT depends on PARAVIRT
select PARAVIRT_CLOCK select PARAVIRT_CLOCK
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
depends on X86_64 || (X86_32 && X86_PAE) depends on X86_64 || (X86_32 && X86_PAE)
depends on X86_LOCAL_APIC && X86_TSC depends on X86_LOCAL_APIC && X86_TSC
help help
@ -15,18 +13,41 @@ config XEN
kernel to boot in a paravirtualized environment under the kernel to boot in a paravirtualized environment under the
Xen hypervisor. Xen hypervisor.
config XEN_DOM0 config XEN_PV
bool "Xen PV guest support"
default y
depends on XEN
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
help
Support running as a Xen PV guest.
config XEN_PV_SMP
def_bool y def_bool y
depends on XEN && PCI_XEN && SWIOTLB_XEN depends on XEN_PV && SMP
config XEN_DOM0
bool "Xen PV Dom0 support"
default y
depends on XEN_PV && PCI_XEN && SWIOTLB_XEN
depends on X86_IO_APIC && ACPI && PCI depends on X86_IO_APIC && ACPI && PCI
help
Support running as a Xen PV Dom0 guest.
config XEN_PVHVM config XEN_PVHVM
def_bool y bool "Xen PVHVM guest support"
default y
depends on XEN && PCI && X86_LOCAL_APIC depends on XEN && PCI && X86_LOCAL_APIC
help
Support running as a Xen PVHVM guest.
config XEN_PVHVM_SMP
def_bool y
depends on XEN_PVHVM && SMP
config XEN_512GB config XEN_512GB
bool "Limit Xen pv-domain memory to 512GB" bool "Limit Xen pv-domain memory to 512GB"
depends on XEN && X86_64 depends on XEN_PV && X86_64
default y default y
help help
Limit paravirtualized user domains to 512GB of RAM. Limit paravirtualized user domains to 512GB of RAM.

View File

@ -7,17 +7,23 @@ endif
# Make sure early boot has no stackprotector # Make sure early boot has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector) nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_enlighten.o := $(nostackp) CFLAGS_enlighten_pv.o := $(nostackp)
CFLAGS_mmu.o := $(nostackp) CFLAGS_mmu_pv.o := $(nostackp)
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ obj-y := enlighten.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \ time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o platform-pci-unplug.o \ grant-table.o suspend.o platform-pci-unplug.o
p2m.o apic.o pmu.o
obj-$(CONFIG_XEN_PVHVM) += enlighten_hvm.o mmu_hvm.o suspend_hvm.o
obj-$(CONFIG_XEN_PV) += setup.o apic.o pmu.o suspend_pv.o \
p2m.o enlighten_pv.o mmu_pv.o
obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o
obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_XEN_PV_SMP) += smp_pv.o
obj-$(CONFIG_XEN_PVHVM_SMP) += smp_hvm.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_XEN_DOM0) += vga.o

View File

@ -81,7 +81,7 @@ static const struct efi efi_xen __initconst = {
.update_capsule = xen_efi_update_capsule, .update_capsule = xen_efi_update_capsule,
.query_capsule_caps = xen_efi_query_capsule_caps, .query_capsule_caps = xen_efi_query_capsule_caps,
.get_next_high_mono_count = xen_efi_get_next_high_mono_count, .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
.reset_system = NULL, /* Functionality provided by Xen. */ .reset_system = xen_efi_reset_system,
.set_virtual_address_map = NULL, /* Not used under Xen. */ .set_virtual_address_map = NULL, /* Not used under Xen. */
.flags = 0 /* Initialized later. */ .flags = 0 /* Initialized later. */
}; };

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,214 @@
#include <linux/cpu.h>
#include <linux/kexec.h>
#include <xen/features.h>
#include <xen/events.h>
#include <xen/interface/memory.h>
#include <asm/cpu.h>
#include <asm/smp.h>
#include <asm/reboot.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
#include "xen-ops.h"
#include "mmu.h"
#include "smp.h"
void __ref xen_hvm_init_shared_info(void)
{
int cpu;
struct xen_add_to_physmap xatp;
static struct shared_info *shared_info_page;
if (!shared_info_page)
shared_info_page = (struct shared_info *)
extend_brk(PAGE_SIZE, PAGE_SIZE);
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
* page, we use it in the event channel upcall and in some pvclock
* related functions. We don't need the vcpu_info placement
* optimizations because we don't use any pv_mmu or pv_irq op on
* HVM.
* When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
* online but xen_hvm_init_shared_info is run at resume time too and
* in that case multiple vcpus might be online. */
for_each_online_cpu(cpu) {
/* Leave it to be NULL. */
if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
continue;
per_cpu(xen_vcpu, cpu) =
&HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
}
}
static void __init init_hvm_pv_info(void)
{
int major, minor;
uint32_t eax, ebx, ecx, edx, base;
base = xen_cpuid_base();
eax = cpuid_eax(base + 1);
major = eax >> 16;
minor = eax & 0xffff;
printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
xen_domain_type = XEN_HVM_DOMAIN;
/* PVH set up hypercall page in xen_prepare_pvh(). */
if (xen_pvh_domain())
pv_info.name = "Xen PVH";
else {
u64 pfn;
uint32_t msr;
pv_info.name = "Xen HVM";
msr = cpuid_ebx(base + 2);
pfn = __pa(hypercall_page);
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
}
xen_setup_features();
cpuid(base + 4, &eax, &ebx, &ecx, &edx);
if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
this_cpu_write(xen_vcpu_id, ebx);
else
this_cpu_write(xen_vcpu_id, smp_processor_id());
}
#ifdef CONFIG_KEXEC_CORE
static void xen_hvm_shutdown(void)
{
native_machine_shutdown();
if (kexec_in_progress)
xen_reboot(SHUTDOWN_soft_reset);
}
static void xen_hvm_crash_shutdown(struct pt_regs *regs)
{
native_machine_crash_shutdown(regs);
xen_reboot(SHUTDOWN_soft_reset);
}
#endif
static int xen_cpu_up_prepare_hvm(unsigned int cpu)
{
int rc;
/*
* This can happen if CPU was offlined earlier and
* offlining timed out in common_cpu_die().
*/
if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
}
if (cpu_acpi_id(cpu) != U32_MAX)
per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
else
per_cpu(xen_vcpu_id, cpu) = cpu;
xen_vcpu_setup(cpu);
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_timer(cpu);
rc = xen_smp_intr_init(cpu);
if (rc) {
WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
cpu, rc);
return rc;
}
return 0;
}
static int xen_cpu_dead_hvm(unsigned int cpu)
{
xen_smp_intr_free(cpu);
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_teardown_timer(cpu);
return 0;
}
static void __init xen_hvm_guest_init(void)
{
if (xen_pv_domain())
return;
init_hvm_pv_info();
xen_hvm_init_shared_info();
xen_panic_handler_init();
if (xen_feature(XENFEAT_hvm_callback_vector))
xen_have_vector_callback = 1;
xen_hvm_smp_init();
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_hvm, xen_cpu_dead_hvm));
xen_unplug_emulated_devices();
x86_init.irqs.intr_init = xen_init_IRQ;
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
if (xen_pvh_domain())
machine_ops.emergency_restart = xen_emergency_restart;
#ifdef CONFIG_KEXEC_CORE
machine_ops.shutdown = xen_hvm_shutdown;
machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
#endif
}
static bool xen_nopv;
static __init int xen_parse_nopv(char *arg)
{
xen_nopv = true;
return 0;
}
early_param("xen_nopv", xen_parse_nopv);
bool xen_hvm_need_lapic(void)
{
if (xen_nopv)
return false;
if (xen_pv_domain())
return false;
if (!xen_hvm_domain())
return false;
if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
return false;
return true;
}
EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
static uint32_t __init xen_platform_hvm(void)
{
if (xen_pv_domain() || xen_nopv)
return 0;
return xen_cpuid_base();
}
const struct hypervisor_x86 x86_hyper_xen_hvm = {
.name = "Xen HVM",
.detect = xen_platform_hvm,
.init_platform = xen_hvm_guest_init,
.pin_vcpu = xen_pin_vcpu,
.x2apic_available = xen_x2apic_para_available,
};
EXPORT_SYMBOL(x86_hyper_xen_hvm);

1513
arch/x86/xen/enlighten_pv.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,106 @@
#include <linux/acpi.h>
#include <xen/hvc-console.h>
#include <asm/io_apic.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
#include <xen/interface/memory.h>
#include <xen/interface/hvm/start_info.h>
/*
* PVH variables.
*
* xen_pvh and pvh_bootparams need to live in data segment since they
* are used after startup_{32|64}, which clear .bss, are invoked.
*/
bool xen_pvh __attribute__((section(".data"))) = 0;
struct boot_params pvh_bootparams __attribute__((section(".data")));
struct hvm_start_info pvh_start_info;
unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
static void xen_pvh_arch_setup(void)
{
/* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
if (nr_ioapics == 0)
acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
}
static void __init init_pvh_bootparams(void)
{
struct xen_memory_map memmap;
int rc;
memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
if (rc) {
xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
BUG();
}
pvh_bootparams.e820_entries = memmap.nr_entries;
if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
ISA_START_ADDRESS;
pvh_bootparams.e820_table[pvh_bootparams.e820_entries].size =
ISA_END_ADDRESS - ISA_START_ADDRESS;
pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
E820_TYPE_RESERVED;
pvh_bootparams.e820_entries++;
} else
xen_raw_printk("Warning: Can fit ISA range into e820\n");
pvh_bootparams.hdr.cmd_line_ptr =
pvh_start_info.cmdline_paddr;
/* The first module is always ramdisk. */
if (pvh_start_info.nr_modules) {
struct hvm_modlist_entry *modaddr =
__va(pvh_start_info.modlist_paddr);
pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
pvh_bootparams.hdr.ramdisk_size = modaddr->size;
}
/*
* See Documentation/x86/boot.txt.
*
* Version 2.12 supports Xen entry point but we will use default x86/PC
* environment (i.e. hardware_subarch 0).
*/
pvh_bootparams.hdr.version = 0x212;
pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
}
/*
* This routine (and those that it might call) should not use
* anything that lives in .bss since that segment will be cleared later.
*/
void __init xen_prepare_pvh(void)
{
u32 msr;
u64 pfn;
if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
pvh_start_info.magic);
BUG();
}
xen_pvh = 1;
msr = cpuid_ebx(xen_cpuid_base() + 2);
pfn = __pa(hypercall_page);
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
init_pvh_bootparams();
x86_init.oem.arch_setup = xen_pvh_arch_setup;
}

File diff suppressed because it is too large Load Diff

79
arch/x86/xen/mmu_hvm.c Normal file
View File

@ -0,0 +1,79 @@
#include <linux/types.h>
#include <linux/crash_dump.h>
#include <xen/interface/xen.h>
#include <xen/hvm.h>
#include "mmu.h"
#ifdef CONFIG_PROC_VMCORE
/*
* This function is used in two contexts:
* - the kdump kernel has to check whether a pfn of the crashed kernel
* was a ballooned page. vmcore is using this function to decide
* whether to access a pfn of the crashed kernel.
* - the kexec kernel has to check whether a pfn was ballooned by the
* previous kernel. If the pfn is ballooned, handle it properly.
* Returns 0 if the pfn is not backed by a RAM page, the caller may
* handle the pfn special in this case.
*/
static int xen_oldmem_pfn_is_ram(unsigned long pfn)
{
struct xen_hvm_get_mem_type a = {
.domid = DOMID_SELF,
.pfn = pfn,
};
int ram;
if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a))
return -ENXIO;
switch (a.mem_type) {
case HVMMEM_mmio_dm:
ram = 0;
break;
case HVMMEM_ram_rw:
case HVMMEM_ram_ro:
default:
ram = 1;
break;
}
return ram;
}
#endif
static void xen_hvm_exit_mmap(struct mm_struct *mm)
{
struct xen_hvm_pagetable_dying a;
int rc;
a.domid = DOMID_SELF;
a.gpa = __pa(mm->pgd);
rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
WARN_ON_ONCE(rc < 0);
}
static int is_pagetable_dying_supported(void)
{
struct xen_hvm_pagetable_dying a;
int rc = 0;
a.domid = DOMID_SELF;
a.gpa = 0x00;
rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
if (rc < 0) {
printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
return 0;
}
return 1;
}
void __init xen_hvm_init_mmu_ops(void)
{
if (is_pagetable_dying_supported())
pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
#ifdef CONFIG_PROC_VMCORE
register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram);
#endif
}

2730
arch/x86/xen/mmu_pv.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -4,8 +4,13 @@
#include <xen/interface/xenpmu.h> #include <xen/interface/xenpmu.h>
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
#ifdef CONFIG_XEN_HAVE_VPMU
void xen_pmu_init(int cpu); void xen_pmu_init(int cpu);
void xen_pmu_finish(int cpu); void xen_pmu_finish(int cpu);
#else
static inline void xen_pmu_init(int cpu) {}
static inline void xen_pmu_finish(int cpu) {}
#endif
bool is_xen_pmu(int cpu); bool is_xen_pmu(int cpu);
bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);

View File

@ -1,63 +1,21 @@
/*
* Xen SMP support
*
* This file implements the Xen versions of smp_ops. SMP under Xen is
* very straightforward. Bringing a CPU up is simply a matter of
* loading its initial context and setting it running.
*
* IPIs are handled through the Xen event mechanism.
*
* Because virtual CPUs can be scheduled onto any real CPU, there's no
* useful topology information for the kernel to make use of. As a
* result, all CPUs are treated as if they're single-core and
* single-threaded.
*/
#include <linux/sched.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/irq_work.h> #include <linux/slab.h>
#include <linux/tick.h> #include <linux/cpumask.h>
#include <linux/nmi.h> #include <linux/percpu.h>
#include <asm/paravirt.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/cpu.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
#include <xen/interface/xenpmu.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
#include <xen/xen.h>
#include <xen/page.h>
#include <xen/events.h> #include <xen/events.h>
#include <xen/hvc-console.h> #include <xen/hvc-console.h>
#include "xen-ops.h" #include "xen-ops.h"
#include "mmu.h"
#include "smp.h" #include "smp.h"
#include "pmu.h"
cpumask_var_t xen_cpu_initialized_map;
struct xen_common_irq {
int irq;
char *name;
};
static DEFINE_PER_CPU(struct xen_common_irq, xen_resched_irq) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_resched_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
/* /*
* Reschedule call back. * Reschedule call back.
@ -70,42 +28,6 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
static void cpu_bringup(void)
{
int cpu;
cpu_init();
touch_softlockup_watchdog();
preempt_disable();
/* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
xen_enable_sysenter();
xen_enable_syscall();
}
cpu = smp_processor_id();
smp_store_cpu_info(cpu);
cpu_data(cpu).x86_max_cores = 1;
set_cpu_sibling_map(cpu);
xen_setup_cpu_clockevents();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
cpu_set_state_online(cpu); /* Implies full memory barrier. */
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
}
asmlinkage __visible void cpu_bringup_and_idle(void)
{
cpu_bringup();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void xen_smp_intr_free(unsigned int cpu) void xen_smp_intr_free(unsigned int cpu)
{ {
if (per_cpu(xen_resched_irq, cpu).irq >= 0) { if (per_cpu(xen_resched_irq, cpu).irq >= 0) {
@ -133,27 +55,12 @@ void xen_smp_intr_free(unsigned int cpu)
kfree(per_cpu(xen_callfuncsingle_irq, cpu).name); kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
per_cpu(xen_callfuncsingle_irq, cpu).name = NULL; per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
} }
if (xen_hvm_domain()) }
return;
if (per_cpu(xen_irq_work, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
per_cpu(xen_irq_work, cpu).irq = -1;
kfree(per_cpu(xen_irq_work, cpu).name);
per_cpu(xen_irq_work, cpu).name = NULL;
}
if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
per_cpu(xen_pmu_irq, cpu).irq = -1;
kfree(per_cpu(xen_pmu_irq, cpu).name);
per_cpu(xen_pmu_irq, cpu).name = NULL;
}
};
int xen_smp_intr_init(unsigned int cpu) int xen_smp_intr_init(unsigned int cpu)
{ {
int rc; int rc;
char *resched_name, *callfunc_name, *debug_name, *pmu_name; char *resched_name, *callfunc_name, *debug_name;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@ -200,37 +107,6 @@ int xen_smp_intr_init(unsigned int cpu)
per_cpu(xen_callfuncsingle_irq, cpu).irq = rc; per_cpu(xen_callfuncsingle_irq, cpu).irq = rc;
per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name; per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
/*
* The IRQ worker on PVHVM goes through the native path and uses the
* IPI mechanism.
*/
if (xen_hvm_domain())
return 0;
callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
cpu,
xen_irq_work_interrupt,
IRQF_PERCPU|IRQF_NOBALANCING,
callfunc_name,
NULL);
if (rc < 0)
goto fail;
per_cpu(xen_irq_work, cpu).irq = rc;
per_cpu(xen_irq_work, cpu).name = callfunc_name;
if (is_xen_pmu(cpu)) {
pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
xen_pmu_irq_handler,
IRQF_PERCPU|IRQF_NOBALANCING,
pmu_name, NULL);
if (rc < 0)
goto fail;
per_cpu(xen_pmu_irq, cpu).irq = rc;
per_cpu(xen_pmu_irq, cpu).name = pmu_name;
}
return 0; return 0;
fail: fail:
@ -238,333 +114,7 @@ int xen_smp_intr_init(unsigned int cpu)
return rc; return rc;
} }
static void __init xen_fill_possible_map(void) void xen_smp_send_reschedule(int cpu)
{
int i, rc;
if (xen_initial_domain())
return;
for (i = 0; i < nr_cpu_ids; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
if (rc >= 0) {
num_processors++;
set_cpu_possible(i, true);
}
}
}
static void __init xen_filter_cpu_maps(void)
{
int i, rc;
unsigned int subtract = 0;
if (!xen_initial_domain())
return;
num_processors = 0;
disabled_cpus = 0;
for (i = 0; i < nr_cpu_ids; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
if (rc >= 0) {
num_processors++;
set_cpu_possible(i, true);
} else {
set_cpu_possible(i, false);
set_cpu_present(i, false);
subtract++;
}
}
#ifdef CONFIG_HOTPLUG_CPU
/* This is akin to using 'nr_cpus' on the Linux command line.
* Which is OK as when we use 'dom0_max_vcpus=X' we can only
* have up to X, while nr_cpu_ids is greater than X. This
* normally is not a problem, except when CPU hotplugging
* is involved and then there might be more than X CPUs
* in the guest - which will not work as there is no
* hypercall to expand the max number of VCPUs an already
* running guest has. So cap it up to X. */
if (subtract)
nr_cpu_ids = nr_cpu_ids - subtract;
#endif
}
static void __init xen_smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();
if (xen_pv_domain()) {
if (!xen_feature(XENFEAT_writable_page_tables))
/* We've switched to the "real" per-cpu gdt, so make
* sure the old memory can be recycled. */
make_lowmem_page_readwrite(xen_initial_gdt);
#ifdef CONFIG_X86_32
/*
* Xen starts us with XEN_FLAT_RING1_DS, but linux code
* expects __USER_DS
*/
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
#endif
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
/*
* Setup vcpu_info for boot CPU.
*/
if (xen_hvm_domain())
xen_vcpu_setup(0);
/*
* The alternative logic (which patches the unlock/lock) runs before
* the smp bootup up code is activated. Hence we need to set this up
* the core kernel is being patched. Otherwise we will have only
* modules patched but not core code.
*/
xen_init_spinlocks();
}
static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
{
unsigned cpu;
unsigned int i;
if (skip_ioapic_setup) {
char *m = (max_cpus == 0) ?
"The nosmp parameter is incompatible with Xen; " \
"use Xen dom0_max_vcpus=1 parameter" :
"The noapic parameter is incompatible with Xen";
xen_raw_printk(m);
panic(m);
}
xen_init_lock_cpu(0);
smp_store_boot_cpu_info();
cpu_data(0).x86_max_cores = 1;
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
xen_pmu_init(0);
if (xen_smp_intr_init(0))
BUG();
if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
panic("could not allocate xen_cpu_initialized_map\n");
cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
/* Restrict the possible_map according to max_cpus. */
while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
continue;
set_cpu_possible(cpu, false);
}
for_each_possible_cpu(cpu)
set_cpu_present(cpu, true);
}
static int
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
{
struct vcpu_guest_context *ctxt;
struct desc_struct *gdt;
unsigned long gdt_mfn;
/* used to tell cpu_init() that it can proceed with initialization */
cpumask_set_cpu(cpu, cpu_callout_mask);
if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
return 0;
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (ctxt == NULL)
return -ENOMEM;
gdt = get_cpu_gdt_rw(cpu);
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.ss = __KERNEL_DS;
xen_copy_trap_info(ctxt->trap_ctxt);
ctxt->ldt_ents = 0;
BUG_ON((unsigned long)gdt & ~PAGE_MASK);
gdt_mfn = arbitrary_virt_to_mfn(gdt);
make_lowmem_page_readonly(gdt);
make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
ctxt->gdt_frames[0] = gdt_mfn;
ctxt->gdt_ents = GDT_ENTRIES;
ctxt->kernel_ss = __KERNEL_DS;
ctxt->kernel_sp = idle->thread.sp0;
#ifdef CONFIG_X86_32
ctxt->event_callback_cs = __KERNEL_CS;
ctxt->failsafe_callback_cs = __KERNEL_CS;
#else
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->event_callback_eip =
(unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
(unsigned long)xen_failsafe_callback;
ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
BUG();
kfree(ctxt);
return 0;
}
static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
{
int rc;
common_cpu_up(cpu, idle);
xen_setup_runstate_info(cpu);
/*
* PV VCPUs are always successfully taken down (see 'while' loop
* in xen_cpu_die()), so -EBUSY is an error.
*/
rc = cpu_check_up_prepare(cpu);
if (rc)
return rc;
/* make sure interrupts start blocked */
per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
rc = cpu_initialize_context(cpu, idle);
if (rc)
return rc;
xen_pmu_init(cpu);
rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
BUG_ON(rc);
while (cpu_report_state(cpu) != CPU_ONLINE)
HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
return 0;
}
static void xen_smp_cpus_done(unsigned int max_cpus)
{
}
#ifdef CONFIG_HOTPLUG_CPU
static int xen_cpu_disable(void)
{
unsigned int cpu = smp_processor_id();
if (cpu == 0)
return -EBUSY;
cpu_disable_common();
load_cr3(swapper_pg_dir);
return 0;
}
static void xen_cpu_die(unsigned int cpu)
{
while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up,
xen_vcpu_nr(cpu), NULL)) {
__set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ/10);
}
if (common_cpu_die(cpu) == 0) {
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
xen_pmu_finish(cpu);
}
}
static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
cpu_bringup();
/*
* commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
* clears certain data that the cpu_idle loop (which called us
* and that we return from) expects. The only way to get that
* data back is to call:
*/
tick_nohz_idle_enter();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
#else /* !CONFIG_HOTPLUG_CPU */
static int xen_cpu_disable(void)
{
return -ENOSYS;
}
static void xen_cpu_die(unsigned int cpu)
{
BUG();
}
static void xen_play_dead(void)
{
BUG();
}
#endif
static void stop_self(void *v)
{
int cpu = smp_processor_id();
/* make sure we're not pinning something down */
load_cr3(swapper_pg_dir);
/* should set up a minimal gdt */
set_cpu_online(cpu, false);
HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
BUG();
}
static void xen_stop_other_cpus(int wait)
{
smp_call_function(stop_self, NULL, wait);
}
static void xen_smp_send_reschedule(int cpu)
{ {
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
} }
@ -578,7 +128,7 @@ static void __xen_send_IPI_mask(const struct cpumask *mask,
xen_send_IPI_one(cpu, vector); xen_send_IPI_one(cpu, vector);
} }
static void xen_smp_send_call_function_ipi(const struct cpumask *mask) void xen_smp_send_call_function_ipi(const struct cpumask *mask)
{ {
int cpu; int cpu;
@ -593,7 +143,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
} }
} }
static void xen_smp_send_call_function_single_ipi(int cpu) void xen_smp_send_call_function_single_ipi(int cpu)
{ {
__xen_send_IPI_mask(cpumask_of(cpu), __xen_send_IPI_mask(cpumask_of(cpu),
XEN_CALL_FUNCTION_SINGLE_VECTOR); XEN_CALL_FUNCTION_SINGLE_VECTOR);
@ -698,54 +248,3 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
{
irq_enter();
irq_work_run();
inc_irq_stat(apic_irq_work_irqs);
irq_exit();
return IRQ_HANDLED;
}
static const struct smp_ops xen_smp_ops __initconst = {
.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_smp_prepare_cpus,
.smp_cpus_done = xen_smp_cpus_done,
.cpu_up = xen_cpu_up,
.cpu_die = xen_cpu_die,
.cpu_disable = xen_cpu_disable,
.play_dead = xen_play_dead,
.stop_other_cpus = xen_stop_other_cpus,
.smp_send_reschedule = xen_smp_send_reschedule,
.send_call_func_ipi = xen_smp_send_call_function_ipi,
.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
};
void __init xen_smp_init(void)
{
smp_ops = xen_smp_ops;
xen_fill_possible_map();
}
static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
WARN_ON(xen_smp_intr_init(0));
xen_init_lock_cpu(0);
}
void __init xen_hvm_smp_init(void)
{
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
smp_ops.cpu_die = xen_cpu_die;
smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu;
}

View File

@ -11,7 +11,17 @@ extern void xen_send_IPI_self(int vector);
extern int xen_smp_intr_init(unsigned int cpu); extern int xen_smp_intr_init(unsigned int cpu);
extern void xen_smp_intr_free(unsigned int cpu); extern void xen_smp_intr_free(unsigned int cpu);
int xen_smp_intr_init_pv(unsigned int cpu);
void xen_smp_intr_free_pv(unsigned int cpu);
void xen_smp_send_reschedule(int cpu);
void xen_smp_send_call_function_ipi(const struct cpumask *mask);
void xen_smp_send_call_function_single_ipi(int cpu);
struct xen_common_irq {
int irq;
char *name;
};
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
static inline int xen_smp_intr_init(unsigned int cpu) static inline int xen_smp_intr_init(unsigned int cpu)
@ -19,6 +29,12 @@ static inline int xen_smp_intr_init(unsigned int cpu)
return 0; return 0;
} }
static inline void xen_smp_intr_free(unsigned int cpu) {} static inline void xen_smp_intr_free(unsigned int cpu) {}
static inline int xen_smp_intr_init_pv(unsigned int cpu)
{
return 0;
}
static inline void xen_smp_intr_free_pv(unsigned int cpu) {}
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#endif #endif

63
arch/x86/xen/smp_hvm.c Normal file
View File

@ -0,0 +1,63 @@
#include <asm/smp.h>
#include <xen/events.h>
#include "xen-ops.h"
#include "smp.h"
static void __init xen_hvm_smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();
/*
* Setup vcpu_info for boot CPU.
*/
xen_vcpu_setup(0);
/*
* The alternative logic (which patches the unlock/lock) runs before
* the smp bootup up code is activated. Hence we need to set this up
* the core kernel is being patched. Otherwise we will have only
* modules patched but not core code.
*/
xen_init_spinlocks();
}
static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
WARN_ON(xen_smp_intr_init(0));
xen_init_lock_cpu(0);
}
#ifdef CONFIG_HOTPLUG_CPU
static void xen_hvm_cpu_die(unsigned int cpu)
{
if (common_cpu_die(cpu) == 0) {
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
}
}
#else
static void xen_hvm_cpu_die(unsigned int cpu)
{
BUG();
}
#endif
void __init xen_hvm_smp_init(void)
{
if (!xen_have_vector_callback)
return;
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
smp_ops.cpu_die = xen_hvm_cpu_die;
smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
}

490
arch/x86/xen/smp_pv.c Normal file
View File

@ -0,0 +1,490 @@
/*
* Xen SMP support
*
* This file implements the Xen versions of smp_ops. SMP under Xen is
* very straightforward. Bringing a CPU up is simply a matter of
* loading its initial context and setting it running.
*
* IPIs are handled through the Xen event mechanism.
*
* Because virtual CPUs can be scheduled onto any real CPU, there's no
* useful topology information for the kernel to make use of. As a
* result, all CPUs are treated as if they're single-core and
* single-threaded.
*/
#include <linux/sched.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/irq_work.h>
#include <linux/tick.h>
#include <linux/nmi.h>
#include <asm/paravirt.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/cpu.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
#include <xen/interface/xenpmu.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
#include <xen/xen.h>
#include <xen/page.h>
#include <xen/events.h>
#include <xen/hvc-console.h>
#include "xen-ops.h"
#include "mmu.h"
#include "smp.h"
#include "pmu.h"
cpumask_var_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
static void cpu_bringup(void)
{
int cpu;
cpu_init();
touch_softlockup_watchdog();
preempt_disable();
/* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
xen_enable_sysenter();
xen_enable_syscall();
}
cpu = smp_processor_id();
smp_store_cpu_info(cpu);
cpu_data(cpu).x86_max_cores = 1;
set_cpu_sibling_map(cpu);
xen_setup_cpu_clockevents();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
cpu_set_state_online(cpu); /* Implies full memory barrier. */
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
}
asmlinkage __visible void cpu_bringup_and_idle(void)
{
cpu_bringup();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void xen_smp_intr_free_pv(unsigned int cpu)
{
if (per_cpu(xen_irq_work, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
per_cpu(xen_irq_work, cpu).irq = -1;
kfree(per_cpu(xen_irq_work, cpu).name);
per_cpu(xen_irq_work, cpu).name = NULL;
}
if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
per_cpu(xen_pmu_irq, cpu).irq = -1;
kfree(per_cpu(xen_pmu_irq, cpu).name);
per_cpu(xen_pmu_irq, cpu).name = NULL;
}
}
int xen_smp_intr_init_pv(unsigned int cpu)
{
int rc;
char *callfunc_name, *pmu_name;
callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
cpu,
xen_irq_work_interrupt,
IRQF_PERCPU|IRQF_NOBALANCING,
callfunc_name,
NULL);
if (rc < 0)
goto fail;
per_cpu(xen_irq_work, cpu).irq = rc;
per_cpu(xen_irq_work, cpu).name = callfunc_name;
if (is_xen_pmu(cpu)) {
pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
xen_pmu_irq_handler,
IRQF_PERCPU|IRQF_NOBALANCING,
pmu_name, NULL);
if (rc < 0)
goto fail;
per_cpu(xen_pmu_irq, cpu).irq = rc;
per_cpu(xen_pmu_irq, cpu).name = pmu_name;
}
return 0;
fail:
xen_smp_intr_free_pv(cpu);
return rc;
}
static void __init xen_fill_possible_map(void)
{
int i, rc;
if (xen_initial_domain())
return;
for (i = 0; i < nr_cpu_ids; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
if (rc >= 0) {
num_processors++;
set_cpu_possible(i, true);
}
}
}
static void __init xen_filter_cpu_maps(void)
{
int i, rc;
unsigned int subtract = 0;
if (!xen_initial_domain())
return;
num_processors = 0;
disabled_cpus = 0;
for (i = 0; i < nr_cpu_ids; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
if (rc >= 0) {
num_processors++;
set_cpu_possible(i, true);
} else {
set_cpu_possible(i, false);
set_cpu_present(i, false);
subtract++;
}
}
#ifdef CONFIG_HOTPLUG_CPU
/* This is akin to using 'nr_cpus' on the Linux command line.
* Which is OK as when we use 'dom0_max_vcpus=X' we can only
* have up to X, while nr_cpu_ids is greater than X. This
* normally is not a problem, except when CPU hotplugging
* is involved and then there might be more than X CPUs
* in the guest - which will not work as there is no
* hypercall to expand the max number of VCPUs an already
* running guest has. So cap it up to X. */
if (subtract)
nr_cpu_ids = nr_cpu_ids - subtract;
#endif
}
static void __init xen_pv_smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();
if (!xen_feature(XENFEAT_writable_page_tables))
/* We've switched to the "real" per-cpu gdt, so make
* sure the old memory can be recycled. */
make_lowmem_page_readwrite(xen_initial_gdt);
#ifdef CONFIG_X86_32
/*
* Xen starts us with XEN_FLAT_RING1_DS, but linux code
* expects __USER_DS
*/
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
#endif
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
/*
* The alternative logic (which patches the unlock/lock) runs before
* the smp bootup up code is activated. Hence we need to set this up
* the core kernel is being patched. Otherwise we will have only
* modules patched but not core code.
*/
xen_init_spinlocks();
}
static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
{
unsigned cpu;
unsigned int i;
if (skip_ioapic_setup) {
char *m = (max_cpus == 0) ?
"The nosmp parameter is incompatible with Xen; " \
"use Xen dom0_max_vcpus=1 parameter" :
"The noapic parameter is incompatible with Xen";
xen_raw_printk(m);
panic(m);
}
xen_init_lock_cpu(0);
smp_store_boot_cpu_info();
cpu_data(0).x86_max_cores = 1;
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
xen_pmu_init(0);
if (xen_smp_intr_init(0) || xen_smp_intr_init_pv(0))
BUG();
if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
panic("could not allocate xen_cpu_initialized_map\n");
cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
/* Restrict the possible_map according to max_cpus. */
while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
continue;
set_cpu_possible(cpu, false);
}
for_each_possible_cpu(cpu)
set_cpu_present(cpu, true);
}
static int
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
{
struct vcpu_guest_context *ctxt;
struct desc_struct *gdt;
unsigned long gdt_mfn;
/* used to tell cpu_init() that it can proceed with initialization */
cpumask_set_cpu(cpu, cpu_callout_mask);
if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
return 0;
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (ctxt == NULL)
return -ENOMEM;
gdt = get_cpu_gdt_rw(cpu);
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.ss = __KERNEL_DS;
xen_copy_trap_info(ctxt->trap_ctxt);
ctxt->ldt_ents = 0;
BUG_ON((unsigned long)gdt & ~PAGE_MASK);
gdt_mfn = arbitrary_virt_to_mfn(gdt);
make_lowmem_page_readonly(gdt);
make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
ctxt->gdt_frames[0] = gdt_mfn;
ctxt->gdt_ents = GDT_ENTRIES;
ctxt->kernel_ss = __KERNEL_DS;
ctxt->kernel_sp = idle->thread.sp0;
#ifdef CONFIG_X86_32
ctxt->event_callback_cs = __KERNEL_CS;
ctxt->failsafe_callback_cs = __KERNEL_CS;
#else
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->event_callback_eip =
(unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
(unsigned long)xen_failsafe_callback;
ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
BUG();
kfree(ctxt);
return 0;
}
static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
{
int rc;
common_cpu_up(cpu, idle);
xen_setup_runstate_info(cpu);
/*
* PV VCPUs are always successfully taken down (see 'while' loop
* in xen_cpu_die()), so -EBUSY is an error.
*/
rc = cpu_check_up_prepare(cpu);
if (rc)
return rc;
/* make sure interrupts start blocked */
per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
rc = cpu_initialize_context(cpu, idle);
if (rc)
return rc;
xen_pmu_init(cpu);
rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
BUG_ON(rc);
while (cpu_report_state(cpu) != CPU_ONLINE)
HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
return 0;
}
static void xen_pv_smp_cpus_done(unsigned int max_cpus)
{
}
#ifdef CONFIG_HOTPLUG_CPU
static int xen_pv_cpu_disable(void)
{
unsigned int cpu = smp_processor_id();
if (cpu == 0)
return -EBUSY;
cpu_disable_common();
load_cr3(swapper_pg_dir);
return 0;
}
static void xen_pv_cpu_die(unsigned int cpu)
{
while (HYPERVISOR_vcpu_op(VCPUOP_is_up,
xen_vcpu_nr(cpu), NULL)) {
__set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ/10);
}
if (common_cpu_die(cpu) == 0) {
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
xen_pmu_finish(cpu);
}
}
static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
cpu_bringup();
/*
* commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
* clears certain data that the cpu_idle loop (which called us
* and that we return from) expects. The only way to get that
* data back is to call:
*/
tick_nohz_idle_enter();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
#else /* !CONFIG_HOTPLUG_CPU */
static int xen_pv_cpu_disable(void)
{
return -ENOSYS;
}
static void xen_pv_cpu_die(unsigned int cpu)
{
BUG();
}
static void xen_pv_play_dead(void)
{
BUG();
}
#endif
static void stop_self(void *v)
{
int cpu = smp_processor_id();
/* make sure we're not pinning something down */
load_cr3(swapper_pg_dir);
/* should set up a minimal gdt */
set_cpu_online(cpu, false);
HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
BUG();
}
static void xen_pv_stop_other_cpus(int wait)
{
smp_call_function(stop_self, NULL, wait);
}
static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
{
irq_enter();
irq_work_run();
inc_irq_stat(apic_irq_work_irqs);
irq_exit();
return IRQ_HANDLED;
}
static const struct smp_ops xen_smp_ops __initconst = {
.smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_pv_smp_prepare_cpus,
.smp_cpus_done = xen_pv_smp_cpus_done,
.cpu_up = xen_pv_cpu_up,
.cpu_die = xen_pv_cpu_die,
.cpu_disable = xen_pv_cpu_disable,
.play_dead = xen_pv_play_dead,
.stop_other_cpus = xen_pv_stop_other_cpus,
.smp_send_reschedule = xen_smp_send_reschedule,
.send_call_func_ipi = xen_smp_send_call_function_ipi,
.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
};
void __init xen_smp_init(void)
{
smp_ops = xen_smp_ops;
xen_fill_possible_map();
}

View File

@ -14,60 +14,6 @@
#include "mmu.h" #include "mmu.h"
#include "pmu.h" #include "pmu.h"
static void xen_pv_pre_suspend(void)
{
xen_mm_pin_all();
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
mfn_to_pfn(xen_start_info->console.domU.mfn);
BUG_ON(!irqs_disabled());
HYPERVISOR_shared_info = &xen_dummy_shared_info;
if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
__pte_ma(0), 0))
BUG();
}
static void xen_hvm_post_suspend(int suspend_cancelled)
{
#ifdef CONFIG_XEN_PVHVM
int cpu;
if (!suspend_cancelled)
xen_hvm_init_shared_info();
xen_callback_vector();
xen_unplug_emulated_devices();
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
for_each_online_cpu(cpu) {
xen_setup_runstate_info(cpu);
}
}
#endif
}
static void xen_pv_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
xen_setup_shared_info();
if (suspend_cancelled) {
xen_start_info->store_mfn =
pfn_to_mfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
pfn_to_mfn(xen_start_info->console.domU.mfn);
} else {
#ifdef CONFIG_SMP
BUG_ON(xen_cpu_initialized_map == NULL);
cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
#endif
xen_vcpu_restore();
}
xen_mm_unpin_all();
}
void xen_arch_pre_suspend(void) void xen_arch_pre_suspend(void)
{ {
if (xen_pv_domain()) if (xen_pv_domain())

View File

@ -0,0 +1,22 @@
#include <linux/types.h>
#include <xen/xen.h>
#include <xen/features.h>
#include <xen/interface/features.h>
#include "xen-ops.h"
void xen_hvm_post_suspend(int suspend_cancelled)
{
int cpu;
if (!suspend_cancelled)
xen_hvm_init_shared_info();
xen_callback_vector();
xen_unplug_emulated_devices();
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
for_each_online_cpu(cpu) {
xen_setup_runstate_info(cpu);
}
}
}

46
arch/x86/xen/suspend_pv.c Normal file
View File

@ -0,0 +1,46 @@
#include <linux/types.h>
#include <asm/fixmap.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
#include "xen-ops.h"
void xen_pv_pre_suspend(void)
{
xen_mm_pin_all();
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
mfn_to_pfn(xen_start_info->console.domU.mfn);
BUG_ON(!irqs_disabled());
HYPERVISOR_shared_info = &xen_dummy_shared_info;
if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
__pte_ma(0), 0))
BUG();
}
void xen_pv_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
xen_setup_shared_info();
if (suspend_cancelled) {
xen_start_info->store_mfn =
pfn_to_mfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
pfn_to_mfn(xen_start_info->console.domU.mfn);
} else {
#ifdef CONFIG_SMP
BUG_ON(xen_cpu_initialized_map == NULL);
cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
#endif
xen_vcpu_restore();
}
xen_mm_unpin_all();
}

View File

@ -436,6 +436,14 @@ static void xen_hvm_setup_cpu_clockevents(void)
void __init xen_hvm_init_time_ops(void) void __init xen_hvm_init_time_ops(void)
{ {
/*
* vector callback is needed otherwise we cannot receive interrupts
* on cpu > 0 and at this point we don't know how many cpus are
* available.
*/
if (!xen_have_vector_callback)
return;
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
printk(KERN_INFO "Xen doesn't support pvclock on HVM," printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
"disable pv timer\n"); "disable pv timer\n");

View File

@ -16,6 +16,7 @@
#include <xen/interface/xen-mca.h> #include <xen/interface/xen-mca.h>
#include <asm/xen/interface.h> #include <asm/xen/interface.h>
#ifdef CONFIG_XEN_PV
__INIT __INIT
ENTRY(startup_xen) ENTRY(startup_xen)
cld cld
@ -34,6 +35,7 @@ ENTRY(startup_xen)
jmp xen_start_kernel jmp xen_start_kernel
__FINIT __FINIT
#endif
.pushsection .text .pushsection .text
.balign PAGE_SIZE .balign PAGE_SIZE
@ -58,7 +60,9 @@ ENTRY(hypercall_page)
/* Map the p2m table to a 512GB-aligned user address. */ /* Map the p2m table to a 512GB-aligned user address. */
ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad PGDIR_SIZE) ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad PGDIR_SIZE)
#endif #endif
#ifdef CONFIG_XEN_PV
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
#endif
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
.ascii "!writable_page_tables|pae_pgdir_above_4gb") .ascii "!writable_page_tables|pae_pgdir_above_4gb")

View File

@ -76,6 +76,8 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
bool xen_vcpu_stolen(int vcpu); bool xen_vcpu_stolen(int vcpu);
extern int xen_have_vcpu_info_placement;
void xen_vcpu_setup(int cpu); void xen_vcpu_setup(int cpu);
void xen_setup_vcpu_info_placement(void); void xen_setup_vcpu_info_placement(void);
@ -146,4 +148,24 @@ __visible void xen_adjust_exception_frame(void);
extern int xen_panic_handler_init(void); extern int xen_panic_handler_init(void);
int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
int (*cpu_dead_cb)(unsigned int));
void xen_pin_vcpu(int cpu);
void xen_emergency_restart(void);
#ifdef CONFIG_XEN_PV
void xen_pv_pre_suspend(void);
void xen_pv_post_suspend(int suspend_cancelled);
#else
static inline void xen_pv_pre_suspend(void) {}
static inline void xen_pv_post_suspend(int suspend_cancelled) {}
#endif
#ifdef CONFIG_XEN_PVHVM
void xen_hvm_post_suspend(int suspend_cancelled);
#else
static inline void xen_hvm_post_suspend(int suspend_cancelled) {}
#endif
#endif /* XEN_OPS_H */ #endif /* XEN_OPS_H */

View File

@ -434,7 +434,7 @@ static int map_data_for_request(struct vscsifrnt_info *info,
if (seg_grants) { if (seg_grants) {
page = virt_to_page(seg); page = virt_to_page(seg);
off = (unsigned long)seg & ~PAGE_MASK; off = offset_in_page(seg);
len = sizeof(struct scsiif_request_segment) * data_grants; len = sizeof(struct scsiif_request_segment) * data_grants;
while (len > 0) { while (len > 0) {
bytes = min_t(unsigned int, len, PAGE_SIZE - off); bytes = min_t(unsigned int, len, PAGE_SIZE - off);

View File

@ -709,6 +709,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
} }
EXPORT_SYMBOL(free_xenballooned_pages); EXPORT_SYMBOL(free_xenballooned_pages);
#ifdef CONFIG_XEN_PV
static void __init balloon_add_region(unsigned long start_pfn, static void __init balloon_add_region(unsigned long start_pfn,
unsigned long pages) unsigned long pages)
{ {
@ -732,19 +733,22 @@ static void __init balloon_add_region(unsigned long start_pfn,
balloon_stats.total_pages += extra_pfn_end - start_pfn; balloon_stats.total_pages += extra_pfn_end - start_pfn;
} }
#endif
static int __init balloon_init(void) static int __init balloon_init(void)
{ {
int i;
if (!xen_domain()) if (!xen_domain())
return -ENODEV; return -ENODEV;
pr_info("Initialising balloon driver\n"); pr_info("Initialising balloon driver\n");
#ifdef CONFIG_XEN_PV
balloon_stats.current_pages = xen_pv_domain() balloon_stats.current_pages = xen_pv_domain()
? min(xen_start_info->nr_pages - xen_released_pages, max_pfn) ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
: get_num_physpages(); : get_num_physpages();
#else
balloon_stats.current_pages = get_num_physpages();
#endif
balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0; balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0; balloon_stats.balloon_high = 0;
@ -761,14 +765,20 @@ static int __init balloon_init(void)
register_sysctl_table(xen_root); register_sysctl_table(xen_root);
#endif #endif
/* #ifdef CONFIG_XEN_PV
* Initialize the balloon with pages from the extra memory {
* regions (see arch/x86/xen/setup.c). int i;
*/
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) /*
if (xen_extra_mem[i].n_pfns) * Initialize the balloon with pages from the extra memory
balloon_add_region(xen_extra_mem[i].start_pfn, * regions (see arch/x86/xen/setup.c).
xen_extra_mem[i].n_pfns); */
for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
if (xen_extra_mem[i].n_pfns)
balloon_add_region(xen_extra_mem[i].start_pfn,
xen_extra_mem[i].n_pfns);
}
#endif
return 0; return 0;
} }

View File

@ -26,6 +26,7 @@
#include <xen/interface/xen.h> #include <xen/interface/xen.h>
#include <xen/interface/platform.h> #include <xen/interface/platform.h>
#include <xen/xen.h> #include <xen/xen.h>
#include <xen/xen-ops.h>
#include <asm/page.h> #include <asm/page.h>
@ -263,3 +264,20 @@ efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
return efi_data(op).status; return efi_data(op).status;
} }
EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps); EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps);
void xen_efi_reset_system(int reset_type, efi_status_t status,
unsigned long data_size, efi_char16_t *data)
{
switch (reset_type) {
case EFI_RESET_COLD:
case EFI_RESET_WARM:
xen_reboot(SHUTDOWN_reboot);
break;
case EFI_RESET_SHUTDOWN:
xen_reboot(SHUTDOWN_poweroff);
break;
default:
BUG();
}
}
EXPORT_SYMBOL_GPL(xen_efi_reset_system);

View File

@ -1312,6 +1312,9 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
if (!VALID_EVTCHN(evtchn)) if (!VALID_EVTCHN(evtchn))
return -1; return -1;
if (!xen_support_evtchn_rebind())
return -1;
/* Send future instances of this interrupt to other vcpu. */ /* Send future instances of this interrupt to other vcpu. */
bind_vcpu.port = evtchn; bind_vcpu.port = evtchn;
bind_vcpu.vcpu = xen_vcpu_nr(tcpu); bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
@ -1646,14 +1649,20 @@ void xen_callback_vector(void)
int rc; int rc;
uint64_t callback_via; uint64_t callback_via;
callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); if (xen_have_vector_callback) {
rc = xen_set_callback_via(callback_via); callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
BUG_ON(rc); rc = xen_set_callback_via(callback_via);
pr_info("Xen HVM callback vector for event delivery is enabled\n"); if (rc) {
/* in the restore case the vector has already been allocated */ pr_err("Request for Xen HVM callback vector failed\n");
if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) xen_have_vector_callback = 0;
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, return;
xen_hvm_callback_vector); }
pr_info("Xen HVM callback vector for event delivery is enabled\n");
/* in the restore case the vector has already been allocated */
if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors))
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
xen_hvm_callback_vector);
}
} }
#else #else
void xen_callback_vector(void) {} void xen_callback_vector(void) {}

View File

@ -90,8 +90,10 @@ static int xen_allocate_irq(struct pci_dev *pdev)
static int platform_pci_resume(struct pci_dev *pdev) static int platform_pci_resume(struct pci_dev *pdev)
{ {
int err; int err;
if (!xen_pv_domain())
if (xen_have_vector_callback)
return 0; return 0;
err = xen_set_callback_via(callback_via); err = xen_set_callback_via(callback_via);
if (err) { if (err) {
dev_err(&pdev->dev, "platform_pci_resume failure!\n"); dev_err(&pdev->dev, "platform_pci_resume failure!\n");
@ -137,15 +139,7 @@ static int platform_pci_probe(struct pci_dev *pdev,
platform_mmio = mmio_addr; platform_mmio = mmio_addr;
platform_mmiolen = mmio_len; platform_mmiolen = mmio_len;
if (!xen_have_vector_callback) {
/*
* Xen HVM guests always use the vector callback mechanism.
* L1 Dom0 in a nested Xen environment is a PV guest inside in an
* HVM environment. It needs the platform-pci driver to get
* notifications from L0 Xen, but it cannot use the vector callback
* as it is not exported by L1 Xen.
*/
if (xen_pv_domain()) {
ret = xen_allocate_irq(pdev); ret = xen_allocate_irq(pdev);
if (ret) { if (ret) {
dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret); dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret);

View File

@ -693,8 +693,8 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
unsigned long attrs) unsigned long attrs)
{ {
#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) #if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
if (__generic_dma_ops(dev)->mmap) if (xen_get_dma_ops(dev)->mmap)
return __generic_dma_ops(dev)->mmap(dev, vma, cpu_addr, return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr,
dma_addr, size, attrs); dma_addr, size, attrs);
#endif #endif
return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
@ -711,7 +711,7 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
unsigned long attrs) unsigned long attrs)
{ {
#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) #if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
if (__generic_dma_ops(dev)->get_sgtable) { if (xen_get_dma_ops(dev)->get_sgtable) {
#if 0 #if 0
/* /*
* This check verifies that the page belongs to the current domain and * This check verifies that the page belongs to the current domain and
@ -721,7 +721,7 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle)); unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle));
BUG_ON (!page_is_ram(bfn)); BUG_ON (!page_is_ram(bfn));
#endif #endif
return __generic_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr, return xen_get_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr,
handle, size, attrs); handle, size, attrs);
} }
#endif #endif

View File

@ -2,8 +2,16 @@
#define _ASM_ARM_XEN_PAGE_COHERENT_H #define _ASM_ARM_XEN_PAGE_COHERENT_H
#include <asm/page.h> #include <asm/page.h>
#include <asm/dma-mapping.h>
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev)
{
if (dev && dev->archdata.dev_dma_ops)
return dev->archdata.dev_dma_ops;
return get_arch_dma_ops(NULL);
}
void __xen_dma_map_page(struct device *hwdev, struct page *page, void __xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size, dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs); enum dma_data_direction dir, unsigned long attrs);
@ -19,13 +27,13 @@ void __xen_dma_sync_single_for_device(struct device *hwdev,
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
{ {
return __generic_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
} }
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{ {
__generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
} }
static inline void xen_dma_map_page(struct device *hwdev, struct page *page, static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
@ -49,7 +57,7 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
* specific function. * specific function.
*/ */
if (local) if (local)
__generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
else else
__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
} }
@ -67,8 +75,8 @@ static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
* specific function. * specific function.
*/ */
if (pfn_valid(pfn)) { if (pfn_valid(pfn)) {
if (__generic_dma_ops(hwdev)->unmap_page) if (xen_get_dma_ops(hwdev)->unmap_page)
__generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
} else } else
__xen_dma_unmap_page(hwdev, handle, size, dir, attrs); __xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
} }
@ -78,8 +86,8 @@ static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
{ {
unsigned long pfn = PFN_DOWN(handle); unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) { if (pfn_valid(pfn)) {
if (__generic_dma_ops(hwdev)->sync_single_for_cpu) if (xen_get_dma_ops(hwdev)->sync_single_for_cpu)
__generic_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir);
} else } else
__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
} }
@ -89,8 +97,8 @@ static inline void xen_dma_sync_single_for_device(struct device *hwdev,
{ {
unsigned long pfn = PFN_DOWN(handle); unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) { if (pfn_valid(pfn)) {
if (__generic_dma_ops(hwdev)->sync_single_for_device) if (xen_get_dma_ops(hwdev)->sync_single_for_device)
__generic_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir);
} else } else
__xen_dma_sync_single_for_device(hwdev, handle, size, dir); __xen_dma_sync_single_for_device(hwdev, handle, size, dir);
} }

View File

@ -0,0 +1,36 @@
/*
* 9pfs.h -- Xen 9PFS transport
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (C) 2017 Stefano Stabellini <stefano@aporeto.com>
*/
#ifndef __XEN_PUBLIC_IO_9PFS_H__
#define __XEN_PUBLIC_IO_9PFS_H__
#include "xen/interface/io/ring.h"
/*
* See docs/misc/9pfs.markdown in xen.git for the full specification:
* https://xenbits.xen.org/docs/unstable/misc/9pfs.html
*/
DEFINE_XEN_FLEX_RING_AND_INTF(xen_9pfs);
#endif

View File

@ -0,0 +1,854 @@
/******************************************************************************
* displif.h
*
* Unified display device I/O interface for Xen guest OSes.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (C) 2016-2017 EPAM Systems Inc.
*
* Authors: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
* Oleksandr Grytsov <oleksandr_grytsov@epam.com>
*/
#ifndef __XEN_PUBLIC_IO_DISPLIF_H__
#define __XEN_PUBLIC_IO_DISPLIF_H__
#include "ring.h"
#include "../grant_table.h"
/*
******************************************************************************
* Protocol version
******************************************************************************
*/
#define XENDISPL_PROTOCOL_VERSION "1"
/*
******************************************************************************
* Main features provided by the protocol
******************************************************************************
* This protocol aims to provide a unified protocol which fits more
* sophisticated use-cases than a framebuffer device can handle. At the
* moment basic functionality is supported with the intention to be extended:
* o multiple dynamically allocated/destroyed framebuffers
* o buffers of arbitrary sizes
* o buffer allocation at either back or front end
* o better configuration options including multiple display support
*
* Note: existing fbif can be used together with displif running at the
* same time, e.g. on Linux one provides framebuffer and another DRM/KMS
*
* Note: display resolution (XenStore's "resolution" property) defines
* visible area of the virtual display. At the same time resolution of
* the display and frame buffers may differ: buffers can be smaller, equal
* or bigger than the visible area. This is to enable use-cases, where backend
* may do some post-processing of the display and frame buffers supplied,
* e.g. those buffers can be just a part of the final composition.
*
******************************************************************************
* Direction of improvements
******************************************************************************
* Future extensions to the existing protocol may include:
* o display/connector cloning
* o allocation of objects other than display buffers
* o plane/overlay support
* o scaling support
* o rotation support
*
******************************************************************************
* Feature and Parameter Negotiation
******************************************************************************
*
* Front->back notifications: when enqueuing a new request, sending a
* notification can be made conditional on xendispl_req (i.e., the generic
* hold-off mechanism provided by the ring macros). Backends must set
* xendispl_req appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
*
* Back->front notifications: when enqueuing a new response, sending a
* notification can be made conditional on xendispl_resp (i.e., the generic
* hold-off mechanism provided by the ring macros). Frontends must set
* xendispl_resp appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
*
* The two halves of a para-virtual display driver utilize nodes within
* XenStore to communicate capabilities and to negotiate operating parameters.
* This section enumerates these nodes which reside in the respective front and
* backend portions of XenStore, following the XenBus convention.
*
* All data in XenStore is stored as strings. Nodes specifying numeric
* values are encoded in decimal. Integer value ranges listed below are
* expressed as fixed sized integer types capable of storing the conversion
* of a properly formated node string, without loss of information.
*
******************************************************************************
* Example configuration
******************************************************************************
*
* Note: depending on the use-case backend can expose more display connectors
* than the underlying HW physically has by employing SW graphics compositors
*
* This is an example of backend and frontend configuration:
*
*--------------------------------- Backend -----------------------------------
*
* /local/domain/0/backend/vdispl/1/0/frontend-id = "1"
* /local/domain/0/backend/vdispl/1/0/frontend = "/local/domain/1/device/vdispl/0"
* /local/domain/0/backend/vdispl/1/0/state = "4"
* /local/domain/0/backend/vdispl/1/0/versions = "1,2"
*
*--------------------------------- Frontend ----------------------------------
*
* /local/domain/1/device/vdispl/0/backend-id = "0"
* /local/domain/1/device/vdispl/0/backend = "/local/domain/0/backend/vdispl/1/0"
* /local/domain/1/device/vdispl/0/state = "4"
* /local/domain/1/device/vdispl/0/version = "1"
* /local/domain/1/device/vdispl/0/be-alloc = "1"
*
*-------------------------- Connector 0 configuration ------------------------
*
* /local/domain/1/device/vdispl/0/0/resolution = "1920x1080"
* /local/domain/1/device/vdispl/0/0/req-ring-ref = "2832"
* /local/domain/1/device/vdispl/0/0/req-event-channel = "15"
* /local/domain/1/device/vdispl/0/0/evt-ring-ref = "387"
* /local/domain/1/device/vdispl/0/0/evt-event-channel = "16"
*
*-------------------------- Connector 1 configuration ------------------------
*
* /local/domain/1/device/vdispl/0/1/resolution = "800x600"
* /local/domain/1/device/vdispl/0/1/req-ring-ref = "2833"
* /local/domain/1/device/vdispl/0/1/req-event-channel = "17"
* /local/domain/1/device/vdispl/0/1/evt-ring-ref = "388"
* /local/domain/1/device/vdispl/0/1/evt-event-channel = "18"
*
******************************************************************************
* Backend XenBus Nodes
******************************************************************************
*
*----------------------------- Protocol version ------------------------------
*
* versions
* Values: <string>
*
* List of XENDISPL_LIST_SEPARATOR separated protocol versions supported
* by the backend. For example "1,2,3".
*
******************************************************************************
* Frontend XenBus Nodes
******************************************************************************
*
*-------------------------------- Addressing ---------------------------------
*
* dom-id
* Values: <uint16_t>
*
* Domain identifier.
*
* dev-id
* Values: <uint16_t>
*
* Device identifier.
*
* conn-idx
* Values: <uint8_t>
*
* Zero based contigous index of the connector.
* /local/domain/<dom-id>/device/vdispl/<dev-id>/<conn-idx>/...
*
*----------------------------- Protocol version ------------------------------
*
* version
* Values: <string>
*
* Protocol version, chosen among the ones supported by the backend.
*
*------------------------- Backend buffer allocation -------------------------
*
* be-alloc
* Values: "0", "1"
*
* If value is set to "1", then backend can be a buffer provider/allocator
* for this domain during XENDISPL_OP_DBUF_CREATE operation (see below
* for negotiation).
* If value is not "1" or omitted frontend must allocate buffers itself.
*
*----------------------------- Connector settings ----------------------------
*
* resolution
* Values: <width, uint32_t>x<height, uint32_t>
*
* Width and height of the connector in pixels separated by
* XENDISPL_RESOLUTION_SEPARATOR. This defines visible area of the
* display.
*
*------------------ Connector Request Transport Parameters -------------------
*
* This communication path is used to deliver requests from frontend to backend
* and get the corresponding responses from backend to frontend,
* set up per connector.
*
* req-event-channel
* Values: <uint32_t>
*
* The identifier of the Xen connector's control event channel
* used to signal activity in the ring buffer.
*
* req-ring-ref
* Values: <uint32_t>
*
* The Xen grant reference granting permission for the backend to map
* a sole page of connector's control ring buffer.
*
*------------------- Connector Event Transport Parameters --------------------
*
* This communication path is used to deliver asynchronous events from backend
* to frontend, set up per connector.
*
* evt-event-channel
* Values: <uint32_t>
*
* The identifier of the Xen connector's event channel
* used to signal activity in the ring buffer.
*
* evt-ring-ref
* Values: <uint32_t>
*
* The Xen grant reference granting permission for the backend to map
* a sole page of connector's event ring buffer.
*/
/*
******************************************************************************
* STATE DIAGRAMS
******************************************************************************
*
* Tool stack creates front and back state nodes with initial state
* XenbusStateInitialising.
* Tool stack creates and sets up frontend display configuration
* nodes per domain.
*
*-------------------------------- Normal flow --------------------------------
*
* Front Back
* ================================= =====================================
* XenbusStateInitialising XenbusStateInitialising
* o Query backend device identification
* data.
* o Open and validate backend device.
* |
* |
* V
* XenbusStateInitWait
*
* o Query frontend configuration
* o Allocate and initialize
* event channels per configured
* connector.
* o Publish transport parameters
* that will be in effect during
* this connection.
* |
* |
* V
* XenbusStateInitialised
*
* o Query frontend transport parameters.
* o Connect to the event channels.
* |
* |
* V
* XenbusStateConnected
*
* o Create and initialize OS
* virtual display connectors
* as per configuration.
* |
* |
* V
* XenbusStateConnected
*
* XenbusStateUnknown
* XenbusStateClosed
* XenbusStateClosing
* o Remove virtual display device
* o Remove event channels
* |
* |
* V
* XenbusStateClosed
*
*------------------------------- Recovery flow -------------------------------
*
* In case of frontend unrecoverable errors backend handles that as
* if frontend goes into the XenbusStateClosed state.
*
* In case of backend unrecoverable errors frontend tries removing
* the virtualized device. If this is possible at the moment of error,
* then frontend goes into the XenbusStateInitialising state and is ready for
* new connection with backend. If the virtualized device is still in use and
* cannot be removed, then frontend goes into the XenbusStateReconfiguring state
* until either the virtualized device is removed or backend initiates a new
* connection. On the virtualized device removal frontend goes into the
* XenbusStateInitialising state.
*
* Note on XenbusStateReconfiguring state of the frontend: if backend has
* unrecoverable errors then frontend cannot send requests to the backend
* and thus cannot provide functionality of the virtualized device anymore.
* After backend is back to normal the virtualized device may still hold some
* state: configuration in use, allocated buffers, client application state etc.
* In most cases, this will require frontend to implement complex recovery
* reconnect logic. Instead, by going into XenbusStateReconfiguring state,
* frontend will make sure no new clients of the virtualized device are
* accepted, allow existing client(s) to exit gracefully by signaling error
* state etc.
* Once all the clients are gone frontend can reinitialize the virtualized
* device and get into XenbusStateInitialising state again signaling the
* backend that a new connection can be made.
*
* There are multiple conditions possible under which frontend will go from
* XenbusStateReconfiguring into XenbusStateInitialising, some of them are OS
* specific. For example:
* 1. The underlying OS framework may provide callbacks to signal that the last
* client of the virtualized device has gone and the device can be removed
* 2. Frontend can schedule a deferred work (timer/tasklet/workqueue)
* to periodically check if this is the right time to re-try removal of
* the virtualized device.
* 3. By any other means.
*
******************************************************************************
* REQUEST CODES
******************************************************************************
* Request codes [0; 15] are reserved and must not be used
*/
#define XENDISPL_OP_DBUF_CREATE 0x10
#define XENDISPL_OP_DBUF_DESTROY 0x11
#define XENDISPL_OP_FB_ATTACH 0x12
#define XENDISPL_OP_FB_DETACH 0x13
#define XENDISPL_OP_SET_CONFIG 0x14
#define XENDISPL_OP_PG_FLIP 0x15
/*
******************************************************************************
* EVENT CODES
******************************************************************************
*/
#define XENDISPL_EVT_PG_FLIP 0x00
/*
******************************************************************************
* XENSTORE FIELD AND PATH NAME STRINGS, HELPERS
******************************************************************************
*/
#define XENDISPL_DRIVER_NAME "vdispl"
#define XENDISPL_LIST_SEPARATOR ","
#define XENDISPL_RESOLUTION_SEPARATOR "x"
#define XENDISPL_FIELD_BE_VERSIONS "versions"
#define XENDISPL_FIELD_FE_VERSION "version"
#define XENDISPL_FIELD_REQ_RING_REF "req-ring-ref"
#define XENDISPL_FIELD_REQ_CHANNEL "req-event-channel"
#define XENDISPL_FIELD_EVT_RING_REF "evt-ring-ref"
#define XENDISPL_FIELD_EVT_CHANNEL "evt-event-channel"
#define XENDISPL_FIELD_RESOLUTION "resolution"
#define XENDISPL_FIELD_BE_ALLOC "be-alloc"
/*
******************************************************************************
* STATUS RETURN CODES
******************************************************************************
*
* Status return code is zero on success and -XEN_EXX on failure.
*
******************************************************************************
* Assumptions
******************************************************************************
* o usage of grant reference 0 as invalid grant reference:
* grant reference 0 is valid, but never exposed to a PV driver,
* because of the fact it is already in use/reserved by the PV console.
* o all references in this document to page sizes must be treated
* as pages of size XEN_PAGE_SIZE unless otherwise noted.
*
******************************************************************************
* Description of the protocol between frontend and backend driver
******************************************************************************
*
* The two halves of a Para-virtual display driver communicate with
* each other using shared pages and event channels.
* Shared page contains a ring with request/response packets.
*
* All reserved fields in the structures below must be 0.
* Display buffers's cookie of value 0 is treated as invalid.
* Framebuffer's cookie of value 0 is treated as invalid.
*
* For all request/response/event packets that use cookies:
* dbuf_cookie - uint64_t, unique to guest domain value used by the backend
* to map remote display buffer to its local one
* fb_cookie - uint64_t, unique to guest domain value used by the backend
* to map remote framebuffer to its local one
*
*---------------------------------- Requests ---------------------------------
*
* All requests/responses, which are not connector specific, must be sent over
* control ring of the connector which has the index value of 0:
* /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
*
* All request packets have the same length (64 octets)
* All request packets have common header:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | operation | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* id - uint16_t, private guest value, echoed in response
* operation - uint8_t, operation code, XENDISPL_OP_???
*
* Request dbuf creation - request creation of a display buffer.
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id |_OP_DBUF_CREATE | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | dbuf_cookie low 32-bit | 12
* +----------------+----------------+----------------+----------------+
* | dbuf_cookie high 32-bit | 16
* +----------------+----------------+----------------+----------------+
* | width | 20
* +----------------+----------------+----------------+----------------+
* | height | 24
* +----------------+----------------+----------------+----------------+
* | bpp | 28
* +----------------+----------------+----------------+----------------+
* | buffer_sz | 32
* +----------------+----------------+----------------+----------------+
* | flags | 36
* +----------------+----------------+----------------+----------------+
* | gref_directory | 40
* +----------------+----------------+----------------+----------------+
* | reserved | 44
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*
* Must be sent over control ring of the connector which has the index
* value of 0:
* /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
* All unused bits in flags field must be set to 0.
*
* An attempt to create multiple display buffers with the same dbuf_cookie is
* an error. dbuf_cookie can be re-used after destroying the corresponding
* display buffer.
*
* Width and height of the display buffers can be smaller, equal or bigger
* than the connector's resolution. Depth/pixel format of the individual
* buffers can differ as well.
*
* width - uint32_t, width in pixels
* height - uint32_t, height in pixels
* bpp - uint32_t, bits per pixel
* buffer_sz - uint32_t, buffer size to be allocated, octets
* flags - uint32_t, flags of the operation
* o XENDISPL_DBUF_FLG_REQ_ALLOC - if set, then backend is requested
* to allocate the buffer with the parameters provided in this request.
* Page directory is handled as follows:
* Frontend on request:
* o allocates pages for the directory (gref_directory,
* gref_dir_next_page(s)
* o grants permissions for the pages of the directory to the backend
* o sets gref_dir_next_page fields
* Backend on response:
* o grants permissions for the pages of the buffer allocated to
* the frontend
* o fills in page directory with grant references
* (gref[] in struct xendispl_page_directory)
* gref_directory - grant_ref_t, a reference to the first shared page
* describing shared buffer references. At least one page exists. If shared
* buffer size (buffer_sz) exceeds what can be addressed by this single page,
* then reference to the next page must be supplied (see gref_dir_next_page
* below)
*/
#define XENDISPL_DBUF_FLG_REQ_ALLOC (1 << 0)
struct xendispl_dbuf_create_req {
uint64_t dbuf_cookie;
uint32_t width;
uint32_t height;
uint32_t bpp;
uint32_t buffer_sz;
uint32_t flags;
grant_ref_t gref_directory;
};
/*
* Shared page for XENDISPL_OP_DBUF_CREATE buffer descriptor (gref_directory in
* the request) employs a list of pages, describing all pages of the shared
* data buffer:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | gref_dir_next_page | 4
* +----------------+----------------+----------------+----------------+
* | gref[0] | 8
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | gref[i] | i*4+8
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | gref[N - 1] | N*4+8
* +----------------+----------------+----------------+----------------+
*
* gref_dir_next_page - grant_ref_t, reference to the next page describing
* page directory. Must be 0 if there are no more pages in the list.
* gref[i] - grant_ref_t, reference to a shared page of the buffer
* allocated at XENDISPL_OP_DBUF_CREATE
*
* Number of grant_ref_t entries in the whole page directory is not
* passed, but instead can be calculated as:
* num_grefs_total = (XENDISPL_OP_DBUF_CREATE.buffer_sz + XEN_PAGE_SIZE - 1) /
* XEN_PAGE_SIZE
*/
struct xendispl_page_directory {
grant_ref_t gref_dir_next_page;
grant_ref_t gref[1]; /* Variable length */
};
/*
* Request dbuf destruction - destroy a previously allocated display buffer:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id |_OP_DBUF_DESTROY| reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | dbuf_cookie low 32-bit | 12
* +----------------+----------------+----------------+----------------+
* | dbuf_cookie high 32-bit | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*
* Must be sent over control ring of the connector which has the index
* value of 0:
* /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
*/
struct xendispl_dbuf_destroy_req {
uint64_t dbuf_cookie;
};
/*
* Request framebuffer attachment - request attachment of a framebuffer to
* previously created display buffer.
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | _OP_FB_ATTACH | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | dbuf_cookie low 32-bit | 12
* +----------------+----------------+----------------+----------------+
* | dbuf_cookie high 32-bit | 16
* +----------------+----------------+----------------+----------------+
* | fb_cookie low 32-bit | 20
* +----------------+----------------+----------------+----------------+
* | fb_cookie high 32-bit | 24
* +----------------+----------------+----------------+----------------+
* | width | 28
* +----------------+----------------+----------------+----------------+
* | height | 32
* +----------------+----------------+----------------+----------------+
* | pixel_format | 36
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*
* Must be sent over control ring of the connector which has the index
* value of 0:
* /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
* Width and height can be smaller, equal or bigger than the connector's
* resolution.
*
* An attempt to create multiple frame buffers with the same fb_cookie is
* an error. fb_cookie can be re-used after destroying the corresponding
* frame buffer.
*
* width - uint32_t, width in pixels
* height - uint32_t, height in pixels
* pixel_format - uint32_t, pixel format of the framebuffer, FOURCC code
*/
struct xendispl_fb_attach_req {
uint64_t dbuf_cookie;
uint64_t fb_cookie;
uint32_t width;
uint32_t height;
uint32_t pixel_format;
};
/*
* Request framebuffer detach - detach a previously
* attached framebuffer from the display buffer in request:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | _OP_FB_DETACH | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | fb_cookie low 32-bit | 12
* +----------------+----------------+----------------+----------------+
* | fb_cookie high 32-bit | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*
* Must be sent over control ring of the connector which has the index
* value of 0:
* /local/domain/<dom-id>/device/vdispl/<dev-id>/0/req-ring-ref
*/
struct xendispl_fb_detach_req {
uint64_t fb_cookie;
};
/*
* Request configuration set/reset - request to set or reset
* the configuration/mode of the display:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | _OP_SET_CONFIG | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | fb_cookie low 32-bit | 12
* +----------------+----------------+----------------+----------------+
* | fb_cookie high 32-bit | 16
* +----------------+----------------+----------------+----------------+
* | x | 20
* +----------------+----------------+----------------+----------------+
* | y | 24
* +----------------+----------------+----------------+----------------+
* | width | 28
* +----------------+----------------+----------------+----------------+
* | height | 32
* +----------------+----------------+----------------+----------------+
* | bpp | 40
* +----------------+----------------+----------------+----------------+
* | reserved | 44
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*
* Pass all zeros to reset, otherwise command is treated as
* configuration set.
* Framebuffer's cookie defines which framebuffer/dbuf must be
* displayed while enabling display (applying configuration).
* x, y, width and height are bound by the connector's resolution and must not
* exceed it.
*
* x - uint32_t, starting position in pixels by X axis
* y - uint32_t, starting position in pixels by Y axis
* width - uint32_t, width in pixels
* height - uint32_t, height in pixels
* bpp - uint32_t, bits per pixel
*/
struct xendispl_set_config_req {
uint64_t fb_cookie;
uint32_t x;
uint32_t y;
uint32_t width;
uint32_t height;
uint32_t bpp;
};
/*
* Request page flip - request to flip a page identified by the framebuffer
* cookie:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | _OP_PG_FLIP | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | fb_cookie low 32-bit | 12
* +----------------+----------------+----------------+----------------+
* | fb_cookie high 32-bit | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*/
struct xendispl_page_flip_req {
uint64_t fb_cookie;
};
/*
*---------------------------------- Responses --------------------------------
*
* All response packets have the same length (64 octets)
*
* All response packets have common header:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | reserved | 4
* +----------------+----------------+----------------+----------------+
* | status | 8
* +----------------+----------------+----------------+----------------+
* | reserved | 12
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*
* id - uint16_t, private guest value, echoed from request
* status - int32_t, response status, zero on success and -XEN_EXX on failure
*
*----------------------------------- Events ----------------------------------
*
* Events are sent via a shared page allocated by the front and propagated by
* evt-event-channel/evt-ring-ref XenStore entries
* All event packets have the same length (64 octets)
* All event packets have common header:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | type | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
*
* id - uint16_t, event id, may be used by front
* type - uint8_t, type of the event
*
*
* Page flip complete event - event from back to front on page flip completed:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | _EVT_PG_FLIP | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | fb_cookie low 32-bit | 12
* +----------------+----------------+----------------+----------------+
* | fb_cookie high 32-bit | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 64
* +----------------+----------------+----------------+----------------+
*/
struct xendispl_pg_flip_evt {
uint64_t fb_cookie;
};
struct xendispl_req {
uint16_t id;
uint8_t operation;
uint8_t reserved[5];
union {
struct xendispl_dbuf_create_req dbuf_create;
struct xendispl_dbuf_destroy_req dbuf_destroy;
struct xendispl_fb_attach_req fb_attach;
struct xendispl_fb_detach_req fb_detach;
struct xendispl_set_config_req set_config;
struct xendispl_page_flip_req pg_flip;
uint8_t reserved[56];
} op;
};
struct xendispl_resp {
uint16_t id;
uint8_t operation;
uint8_t reserved;
int32_t status;
uint8_t reserved1[56];
};
struct xendispl_evt {
uint16_t id;
uint8_t type;
uint8_t reserved[5];
union {
struct xendispl_pg_flip_evt pg_flip;
uint8_t reserved[56];
} op;
};
DEFINE_RING_TYPES(xen_displif, struct xendispl_req, struct xendispl_resp);
/*
******************************************************************************
* Back to front events delivery
******************************************************************************
* In order to deliver asynchronous events from back to front a shared page is
* allocated by front and its granted reference propagated to back via
* XenStore entries (evt-ring-ref/evt-event-channel).
* This page has a common header used by both front and back to synchronize
* access and control event's ring buffer, while back being a producer of the
* events and front being a consumer. The rest of the page after the header
* is used for event packets.
*
* Upon reception of an event(s) front may confirm its reception
* for either each event, group of events or none.
*/
struct xendispl_event_page {
uint32_t in_cons;
uint32_t in_prod;
uint8_t reserved[56];
};
#define XENDISPL_EVENT_PAGE_SIZE XEN_PAGE_SIZE
#define XENDISPL_IN_RING_OFFS (sizeof(struct xendispl_event_page))
#define XENDISPL_IN_RING_SIZE (XENDISPL_EVENT_PAGE_SIZE - XENDISPL_IN_RING_OFFS)
#define XENDISPL_IN_RING_LEN (XENDISPL_IN_RING_SIZE / sizeof(struct xendispl_evt))
#define XENDISPL_IN_RING(page) \
((struct xendispl_evt *)((char *)(page) + XENDISPL_IN_RING_OFFS))
#define XENDISPL_IN_RING_REF(page, idx) \
(XENDISPL_IN_RING((page))[(idx) % XENDISPL_IN_RING_LEN])
#endif /* __XEN_PUBLIC_IO_DISPLIF_H__ */

View File

@ -26,43 +26,432 @@
#ifndef __XEN_PUBLIC_IO_KBDIF_H__ #ifndef __XEN_PUBLIC_IO_KBDIF_H__
#define __XEN_PUBLIC_IO_KBDIF_H__ #define __XEN_PUBLIC_IO_KBDIF_H__
/* In events (backend -> frontend) */ /*
*****************************************************************************
* Feature and Parameter Negotiation
*****************************************************************************
*
* The two halves of a para-virtual driver utilize nodes within
* XenStore to communicate capabilities and to negotiate operating parameters.
* This section enumerates these nodes which reside in the respective front and
* backend portions of XenStore, following XenBus convention.
*
* All data in XenStore is stored as strings. Nodes specifying numeric
* values are encoded in decimal. Integer value ranges listed below are
* expressed as fixed sized integer types capable of storing the conversion
* of a properly formated node string, without loss of information.
*
*****************************************************************************
* Backend XenBus Nodes
*****************************************************************************
*
*---------------------------- Features supported ----------------------------
*
* Capable backend advertises supported features by publishing
* corresponding entries in XenStore and puts 1 as the value of the entry.
* If a feature is not supported then 0 must be set or feature entry omitted.
*
* feature-abs-pointer
* Values: <uint>
*
* Backends, which support reporting of absolute coordinates for pointer
* device should set this to 1.
*
* feature-multi-touch
* Values: <uint>
*
* Backends, which support reporting of multi-touch events
* should set this to 1.
*
*------------------------- Pointer Device Parameters ------------------------
*
* width
* Values: <uint>
*
* Maximum X coordinate (width) to be used by the frontend
* while reporting input events, pixels, [0; UINT32_MAX].
*
* height
* Values: <uint>
*
* Maximum Y coordinate (height) to be used by the frontend
* while reporting input events, pixels, [0; UINT32_MAX].
*
*****************************************************************************
* Frontend XenBus Nodes
*****************************************************************************
*
*------------------------------ Feature request -----------------------------
*
* Capable frontend requests features from backend via setting corresponding
* entries to 1 in XenStore. Requests for features not advertised as supported
* by the backend have no effect.
*
* request-abs-pointer
* Values: <uint>
*
* Request backend to report absolute pointer coordinates
* (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION).
*
* request-multi-touch
* Values: <uint>
*
* Request backend to report multi-touch events.
*
*----------------------- Request Transport Parameters -----------------------
*
* event-channel
* Values: <uint>
*
* The identifier of the Xen event channel used to signal activity
* in the ring buffer.
*
* page-gref
* Values: <uint>
*
* The Xen grant reference granting permission for the backend to map
* a sole page in a single page sized event ring buffer.
*
* page-ref
* Values: <uint>
*
* OBSOLETE, not recommended for use.
* PFN of the shared page.
*
*----------------------- Multi-touch Device Parameters -----------------------
*
* multi-touch-num-contacts
* Values: <uint>
*
* Number of simultaneous touches reported.
*
* multi-touch-width
* Values: <uint>
*
* Width of the touch area to be used by the frontend
* while reporting input events, pixels, [0; UINT32_MAX].
*
* multi-touch-height
* Values: <uint>
*
* Height of the touch area to be used by the frontend
* while reporting input events, pixels, [0; UINT32_MAX].
*/
/* /*
* EVENT CODES.
*/
#define XENKBD_TYPE_MOTION 1
#define XENKBD_TYPE_RESERVED 2
#define XENKBD_TYPE_KEY 3
#define XENKBD_TYPE_POS 4
#define XENKBD_TYPE_MTOUCH 5
/* Multi-touch event sub-codes */
#define XENKBD_MT_EV_DOWN 0
#define XENKBD_MT_EV_UP 1
#define XENKBD_MT_EV_MOTION 2
#define XENKBD_MT_EV_SYN 3
#define XENKBD_MT_EV_SHAPE 4
#define XENKBD_MT_EV_ORIENT 5
/*
* CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS.
*/
#define XENKBD_DRIVER_NAME "vkbd"
#define XENKBD_FIELD_FEAT_ABS_POINTER "feature-abs-pointer"
#define XENKBD_FIELD_FEAT_MTOUCH "feature-multi-touch"
#define XENKBD_FIELD_REQ_ABS_POINTER "request-abs-pointer"
#define XENKBD_FIELD_REQ_MTOUCH "request-multi-touch"
#define XENKBD_FIELD_RING_GREF "page-gref"
#define XENKBD_FIELD_EVT_CHANNEL "event-channel"
#define XENKBD_FIELD_WIDTH "width"
#define XENKBD_FIELD_HEIGHT "height"
#define XENKBD_FIELD_MT_WIDTH "multi-touch-width"
#define XENKBD_FIELD_MT_HEIGHT "multi-touch-height"
#define XENKBD_FIELD_MT_NUM_CONTACTS "multi-touch-num-contacts"
/* OBSOLETE, not recommended for use */
#define XENKBD_FIELD_RING_REF "page-ref"
/*
*****************************************************************************
* Description of the protocol between frontend and backend driver.
*****************************************************************************
*
* The two halves of a Para-virtual driver communicate with
* each other using a shared page and an event channel.
* Shared page contains a ring with event structures.
*
* All reserved fields in the structures below must be 0.
*
*****************************************************************************
* Backend to frontend events
*****************************************************************************
*
* Frontends should ignore unknown in events. * Frontends should ignore unknown in events.
* All event packets have the same length (40 octets)
* All event packets have common header:
*
* 0 octet
* +-----------------+
* | type |
* +-----------------+
* type - uint8_t, event code, XENKBD_TYPE_???
*
*
* Pointer relative movement event
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_MOTION | reserved | 4
* +----------------+----------------+----------------+----------------+
* | rel_x | 8
* +----------------+----------------+----------------+----------------+
* | rel_y | 12
* +----------------+----------------+----------------+----------------+
* | rel_z | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
*
* rel_x - int32_t, relative X motion
* rel_y - int32_t, relative Y motion
* rel_z - int32_t, relative Z motion (wheel)
*/ */
/* Pointer movement event */
#define XENKBD_TYPE_MOTION 1
/* Event type 2 currently not used */
/* Key event (includes pointer buttons) */
#define XENKBD_TYPE_KEY 3
/*
* Pointer position event
* Capable backend sets feature-abs-pointer in xenstore.
* Frontend requests ot instead of XENKBD_TYPE_MOTION by setting
* request-abs-update in xenstore.
*/
#define XENKBD_TYPE_POS 4
struct xenkbd_motion { struct xenkbd_motion {
uint8_t type; /* XENKBD_TYPE_MOTION */ uint8_t type;
int32_t rel_x; /* relative X motion */ int32_t rel_x;
int32_t rel_y; /* relative Y motion */ int32_t rel_y;
int32_t rel_z; /* relative Z motion (wheel) */ int32_t rel_z;
}; };
/*
* Key event (includes pointer buttons)
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_KEY | pressed | reserved | 4
* +----------------+----------------+----------------+----------------+
* | keycode | 8
* +----------------+----------------+----------------+----------------+
* | reserved | 12
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
*
* pressed - uint8_t, 1 if pressed; 0 otherwise
* keycode - uint32_t, KEY_* from linux/input.h
*/
struct xenkbd_key { struct xenkbd_key {
uint8_t type; /* XENKBD_TYPE_KEY */ uint8_t type;
uint8_t pressed; /* 1 if pressed; 0 otherwise */ uint8_t pressed;
uint32_t keycode; /* KEY_* from linux/input.h */ uint32_t keycode;
}; };
/*
* Pointer absolute position event
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_POS | reserved | 4
* +----------------+----------------+----------------+----------------+
* | abs_x | 8
* +----------------+----------------+----------------+----------------+
* | abs_y | 12
* +----------------+----------------+----------------+----------------+
* | rel_z | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
*
* abs_x - int32_t, absolute X position (in FB pixels)
* abs_y - int32_t, absolute Y position (in FB pixels)
* rel_z - int32_t, relative Z motion (wheel)
*/
struct xenkbd_position { struct xenkbd_position {
uint8_t type; /* XENKBD_TYPE_POS */ uint8_t type;
int32_t abs_x; /* absolute X position (in FB pixels) */ int32_t abs_x;
int32_t abs_y; /* absolute Y position (in FB pixels) */ int32_t abs_y;
int32_t rel_z; /* relative Z motion (wheel) */ int32_t rel_z;
};
/*
* Multi-touch event and its sub-types
*
* All multi-touch event packets have common header:
*
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_MTOUCH | event_type | contact_id | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
*
* event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_???
* contact_id - unt8_t, ID of the contact
*
* Touch interactions can consist of one or more contacts.
* For each contact, a series of events is generated, starting
* with a down event, followed by zero or more motion events,
* and ending with an up event. Events relating to the same
* contact point can be identified by the ID of the sequence: contact ID.
* Contact ID may be reused after XENKBD_MT_EV_UP event and
* is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range.
*
* For further information please refer to documentation on Wayland [1],
* Linux [2] and Windows [3] multi-touch support.
*
* [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml
* [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt
* [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx
*
*
* Multi-touch down event - sent when a new touch is made: touch is assigned
* a unique contact ID, sent with this and consequent events related
* to this touch.
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_MTOUCH | _MT_EV_DOWN | contact_id | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | abs_x | 12
* +----------------+----------------+----------------+----------------+
* | abs_y | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
*
* abs_x - int32_t, absolute X position, in pixels
* abs_y - int32_t, absolute Y position, in pixels
*
* Multi-touch contact release event
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_MTOUCH | _MT_EV_UP | contact_id | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
*
* Multi-touch motion event
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_MTOUCH | _MT_EV_MOTION | contact_id | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | abs_x | 12
* +----------------+----------------+----------------+----------------+
* | abs_y | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
*
* abs_x - int32_t, absolute X position, in pixels,
* abs_y - int32_t, absolute Y position, in pixels,
*
* Multi-touch input synchronization event - shows end of a set of events
* which logically belong together.
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_MTOUCH | _MT_EV_SYN | contact_id | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
*
* Multi-touch shape event - touch point's shape has changed its shape.
* Shape is approximated by an ellipse through the major and minor axis
* lengths: major is the longer diameter of the ellipse and minor is the
* shorter one. Center of the ellipse is reported via
* XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events.
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_MTOUCH | _MT_EV_SHAPE | contact_id | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | major | 12
* +----------------+----------------+----------------+----------------+
* | minor | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
*
* major - unt32_t, length of the major axis, pixels
* minor - unt32_t, length of the minor axis, pixels
*
* Multi-touch orientation event - touch point's shape has changed
* its orientation: calculated as a clockwise angle between the major axis
* of the ellipse and positive Y axis in degrees, [-180; +180].
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | _TYPE_MTOUCH | _MT_EV_ORIENT | contact_id | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | orientation | reserved | 12
* +----------------+----------------+----------------+----------------+
* | reserved | 16
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 40
* +----------------+----------------+----------------+----------------+
*
* orientation - int16_t, clockwise angle of the major axis
*/
struct xenkbd_mtouch {
uint8_t type; /* XENKBD_TYPE_MTOUCH */
uint8_t event_type; /* XENKBD_MT_EV_??? */
uint8_t contact_id;
uint8_t reserved[5]; /* reserved for the future use */
union {
struct {
int32_t abs_x; /* absolute X position, pixels */
int32_t abs_y; /* absolute Y position, pixels */
} pos;
struct {
uint32_t major; /* length of the major axis, pixels */
uint32_t minor; /* length of the minor axis, pixels */
} shape;
int16_t orientation; /* clockwise angle of the major axis */
} u;
}; };
#define XENKBD_IN_EVENT_SIZE 40 #define XENKBD_IN_EVENT_SIZE 40
@ -72,15 +461,26 @@ union xenkbd_in_event {
struct xenkbd_motion motion; struct xenkbd_motion motion;
struct xenkbd_key key; struct xenkbd_key key;
struct xenkbd_position pos; struct xenkbd_position pos;
struct xenkbd_mtouch mtouch;
char pad[XENKBD_IN_EVENT_SIZE]; char pad[XENKBD_IN_EVENT_SIZE];
}; };
/* Out events (frontend -> backend) */
/* /*
*****************************************************************************
* Frontend to backend events
*****************************************************************************
*
* Out events may be sent only when requested by backend, and receipt * Out events may be sent only when requested by backend, and receipt
* of an unknown out event is an error. * of an unknown out event is an error.
* No out events currently defined. * No out events currently defined.
* All event packets have the same length (40 octets)
* All event packets have common header:
* 0 octet
* +-----------------+
* | type |
* +-----------------+
* type - uint8_t, event code
*/ */
#define XENKBD_OUT_EVENT_SIZE 40 #define XENKBD_OUT_EVENT_SIZE 40
@ -90,7 +490,11 @@ union xenkbd_out_event {
char pad[XENKBD_OUT_EVENT_SIZE]; char pad[XENKBD_OUT_EVENT_SIZE];
}; };
/* shared page */ /*
*****************************************************************************
* Shared page
*****************************************************************************
*/
#define XENKBD_IN_RING_SIZE 2048 #define XENKBD_IN_RING_SIZE 2048
#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) #define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
@ -113,4 +517,4 @@ struct xenkbd_page {
uint32_t out_cons, out_prod; uint32_t out_cons, out_prod;
}; };
#endif #endif /* __XEN_PUBLIC_IO_KBDIF_H__ */

View File

@ -283,4 +283,147 @@ struct __name##_back_ring { \
(_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
} while (0) } while (0)
/*
* DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and
* functions to check if there is data on the ring, and to read and
* write to them.
*
* DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but
* does not define the indexes page. As different protocols can have
* extensions to the basic format, this macro allow them to define their
* own struct.
*
* XEN_FLEX_RING_SIZE
* Convenience macro to calculate the size of one of the two rings
* from the overall order.
*
* $NAME_mask
* Function to apply the size mask to an index, to reduce the index
* within the range [0-size].
*
* $NAME_read_packet
* Function to read data from the ring. The amount of data to read is
* specified by the "size" argument.
*
* $NAME_write_packet
* Function to write data to the ring. The amount of data to write is
* specified by the "size" argument.
*
* $NAME_get_ring_ptr
* Convenience function that returns a pointer to read/write to the
* ring at the right location.
*
* $NAME_data_intf
* Indexes page, shared between frontend and backend. It also
* contains the array of grant refs.
*
* $NAME_queued
* Function to calculate how many bytes are currently on the ring,
* ready to be read. It can also be used to calculate how much free
* space is currently on the ring (XEN_FLEX_RING_SIZE() -
* $NAME_queued()).
*/
#ifndef XEN_PAGE_SHIFT
/* The PAGE_SIZE for ring protocols and hypercall interfaces is always
* 4K, regardless of the architecture, and page granularity chosen by
* operating systems.
*/
#define XEN_PAGE_SHIFT 12
#endif
#define XEN_FLEX_RING_SIZE(order) \
(1UL << ((order) + XEN_PAGE_SHIFT - 1))
#define DEFINE_XEN_FLEX_RING(name) \
static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size) \
{ \
return idx & (ring_size - 1); \
} \
\
static inline unsigned char *name##_get_ring_ptr(unsigned char *buf, \
RING_IDX idx, \
RING_IDX ring_size) \
{ \
return buf + name##_mask(idx, ring_size); \
} \
\
static inline void name##_read_packet(void *opaque, \
const unsigned char *buf, \
size_t size, \
RING_IDX masked_prod, \
RING_IDX *masked_cons, \
RING_IDX ring_size) \
{ \
if (*masked_cons < masked_prod || \
size <= ring_size - *masked_cons) { \
memcpy(opaque, buf + *masked_cons, size); \
} else { \
memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons); \
memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf, \
size - (ring_size - *masked_cons)); \
} \
*masked_cons = name##_mask(*masked_cons + size, ring_size); \
} \
\
static inline void name##_write_packet(unsigned char *buf, \
const void *opaque, \
size_t size, \
RING_IDX *masked_prod, \
RING_IDX masked_cons, \
RING_IDX ring_size) \
{ \
if (*masked_prod < masked_cons || \
size <= ring_size - *masked_prod) { \
memcpy(buf + *masked_prod, opaque, size); \
} else { \
memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod); \
memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), \
size - (ring_size - *masked_prod)); \
} \
*masked_prod = name##_mask(*masked_prod + size, ring_size); \
} \
\
static inline RING_IDX name##_queued(RING_IDX prod, \
RING_IDX cons, \
RING_IDX ring_size) \
{ \
RING_IDX size; \
\
if (prod == cons) \
return 0; \
\
prod = name##_mask(prod, ring_size); \
cons = name##_mask(cons, ring_size); \
\
if (prod == cons) \
return ring_size; \
\
if (prod > cons) \
size = prod - cons; \
else \
size = ring_size - (cons - prod); \
return size; \
} \
\
struct name##_data { \
unsigned char *in; /* half of the allocation */ \
unsigned char *out; /* half of the allocation */ \
}
#define DEFINE_XEN_FLEX_RING_AND_INTF(name) \
struct name##_data_intf { \
RING_IDX in_cons, in_prod; \
\
uint8_t pad1[56]; \
\
RING_IDX out_cons, out_prod; \
\
uint8_t pad2[56]; \
\
RING_IDX ring_order; \
grant_ref_t ref[]; \
}; \
DEFINE_XEN_FLEX_RING(name)
#endif /* __XEN_PUBLIC_IO_RING_H__ */ #endif /* __XEN_PUBLIC_IO_RING_H__ */

View File

@ -0,0 +1,793 @@
/******************************************************************************
* sndif.h
*
* Unified sound-device I/O interface for Xen guest OSes.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Copyright (C) 2013-2015 GlobalLogic Inc.
* Copyright (C) 2016-2017 EPAM Systems Inc.
*
* Authors: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
* Oleksandr Grytsov <oleksandr_grytsov@epam.com>
* Oleksandr Dmytryshyn <oleksandr.dmytryshyn@globallogic.com>
* Iurii Konovalenko <iurii.konovalenko@globallogic.com>
*/
#ifndef __XEN_PUBLIC_IO_SNDIF_H__
#define __XEN_PUBLIC_IO_SNDIF_H__
#include "ring.h"
#include "../grant_table.h"
/*
******************************************************************************
* Feature and Parameter Negotiation
******************************************************************************
*
* Front->back notifications: when enqueuing a new request, sending a
* notification can be made conditional on xensnd_req (i.e., the generic
* hold-off mechanism provided by the ring macros). Backends must set
* xensnd_req appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
*
* Back->front notifications: when enqueuing a new response, sending a
* notification can be made conditional on xensnd_resp (i.e., the generic
* hold-off mechanism provided by the ring macros). Frontends must set
* xensnd_resp appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
*
* The two halves of a para-virtual sound card driver utilize nodes within
* XenStore to communicate capabilities and to negotiate operating parameters.
* This section enumerates these nodes which reside in the respective front and
* backend portions of XenStore, following the XenBus convention.
*
* All data in XenStore is stored as strings. Nodes specifying numeric
* values are encoded in decimal. Integer value ranges listed below are
* expressed as fixed sized integer types capable of storing the conversion
* of a properly formated node string, without loss of information.
*
******************************************************************************
* Example configuration
******************************************************************************
*
* Note: depending on the use-case backend can expose more sound cards and
* PCM devices/streams than the underlying HW physically has by employing
* SW mixers, configuring virtual sound streams, channels etc.
*
* This is an example of backend and frontend configuration:
*
*--------------------------------- Backend -----------------------------------
*
* /local/domain/0/backend/vsnd/1/0/frontend-id = "1"
* /local/domain/0/backend/vsnd/1/0/frontend = "/local/domain/1/device/vsnd/0"
* /local/domain/0/backend/vsnd/1/0/state = "4"
* /local/domain/0/backend/vsnd/1/0/versions = "1,2"
*
*--------------------------------- Frontend ----------------------------------
*
* /local/domain/1/device/vsnd/0/backend-id = "0"
* /local/domain/1/device/vsnd/0/backend = "/local/domain/0/backend/vsnd/1/0"
* /local/domain/1/device/vsnd/0/state = "4"
* /local/domain/1/device/vsnd/0/version = "1"
*
*----------------------------- Card configuration ----------------------------
*
* /local/domain/1/device/vsnd/0/short-name = "Card short name"
* /local/domain/1/device/vsnd/0/long-name = "Card long name"
* /local/domain/1/device/vsnd/0/sample-rates = "8000,32000,44100,48000,96000"
* /local/domain/1/device/vsnd/0/sample-formats = "s8,u8,s16_le,s16_be"
* /local/domain/1/device/vsnd/0/buffer-size = "262144"
*
*------------------------------- PCM device 0 --------------------------------
*
* /local/domain/1/device/vsnd/0/0/name = "General analog"
* /local/domain/1/device/vsnd/0/0/channels-max = "5"
*
*----------------------------- Stream 0, playback ----------------------------
*
* /local/domain/1/device/vsnd/0/0/0/type = "p"
* /local/domain/1/device/vsnd/0/0/0/sample-formats = "s8,u8"
* /local/domain/1/device/vsnd/0/0/0/unique-id = "0"
*
* /local/domain/1/device/vsnd/0/0/0/ring-ref = "386"
* /local/domain/1/device/vsnd/0/0/0/event-channel = "15"
*
*------------------------------ Stream 1, capture ----------------------------
*
* /local/domain/1/device/vsnd/0/0/1/type = "c"
* /local/domain/1/device/vsnd/0/0/1/channels-max = "2"
* /local/domain/1/device/vsnd/0/0/1/unique-id = "1"
*
* /local/domain/1/device/vsnd/0/0/1/ring-ref = "384"
* /local/domain/1/device/vsnd/0/0/1/event-channel = "13"
*
*------------------------------- PCM device 1 --------------------------------
*
* /local/domain/1/device/vsnd/0/1/name = "HDMI-0"
* /local/domain/1/device/vsnd/0/1/sample-rates = "8000,32000,44100"
*
*------------------------------ Stream 0, capture ----------------------------
*
* /local/domain/1/device/vsnd/0/1/0/type = "c"
* /local/domain/1/device/vsnd/0/1/0/unique-id = "2"
*
* /local/domain/1/device/vsnd/0/1/0/ring-ref = "387"
* /local/domain/1/device/vsnd/0/1/0/event-channel = "151"
*
*------------------------------- PCM device 2 --------------------------------
*
* /local/domain/1/device/vsnd/0/2/name = "SPDIF"
*
*----------------------------- Stream 0, playback ----------------------------
*
* /local/domain/1/device/vsnd/0/2/0/type = "p"
* /local/domain/1/device/vsnd/0/2/0/unique-id = "3"
*
* /local/domain/1/device/vsnd/0/2/0/ring-ref = "389"
* /local/domain/1/device/vsnd/0/2/0/event-channel = "152"
*
******************************************************************************
* Backend XenBus Nodes
******************************************************************************
*
*----------------------------- Protocol version ------------------------------
*
* versions
* Values: <string>
*
* List of XENSND_LIST_SEPARATOR separated protocol versions supported
* by the backend. For example "1,2,3".
*
******************************************************************************
* Frontend XenBus Nodes
******************************************************************************
*
*-------------------------------- Addressing ---------------------------------
*
* dom-id
* Values: <uint16_t>
*
* Domain identifier.
*
* dev-id
* Values: <uint16_t>
*
* Device identifier.
*
* pcm-dev-idx
* Values: <uint8_t>
*
* Zero based contigous index of the PCM device.
*
* stream-idx
* Values: <uint8_t>
*
* Zero based contigous index of the stream of the PCM device.
*
* The following pattern is used for addressing:
* /local/domain/<dom-id>/device/vsnd/<dev-id>/<pcm-dev-idx>/<stream-idx>/...
*
*----------------------------- Protocol version ------------------------------
*
* version
* Values: <string>
*
* Protocol version, chosen among the ones supported by the backend.
*
*------------------------------- PCM settings --------------------------------
*
* Every virtualized sound frontend has a set of PCM devices and streams, each
* could be individually configured. Part of the PCM configuration can be
* defined at higher level of the hierarchy and be fully or partially re-used
* by the underlying layers. These configuration values are:
* o number of channels (min/max)
* o supported sample rates
* o supported sample formats.
* E.g. one can define these values for the whole card, device or stream.
* Every underlying layer in turn can re-define some or all of them to better
* fit its needs. For example, card may define number of channels to be
* in [1; 8] range, and some particular stream may be limited to [1; 2] only.
* The rule is that the underlying layer must be a subset of the upper layer
* range.
*
* channels-min
* Values: <uint8_t>
*
* The minimum amount of channels that is supported, [1; channels-max].
* Optional, if not set or omitted a value of 1 is used.
*
* channels-max
* Values: <uint8_t>
*
* The maximum amount of channels that is supported.
* Must be at least <channels-min>.
*
* sample-rates
* Values: <list of uint32_t>
*
* List of supported sample rates separated by XENSND_LIST_SEPARATOR.
* Sample rates are expressed as a list of decimal values w/o any
* ordering requirement.
*
* sample-formats
* Values: <list of XENSND_PCM_FORMAT_XXX_STR>
*
* List of supported sample formats separated by XENSND_LIST_SEPARATOR.
* Items must not exceed XENSND_SAMPLE_FORMAT_MAX_LEN length.
*
* buffer-size
* Values: <uint32_t>
*
* The maximum size in octets of the buffer to allocate per stream.
*
*----------------------- Virtual sound card settings -------------------------
* short-name
* Values: <char[32]>
*
* Short name of the virtual sound card. Optional.
*
* long-name
* Values: <char[80]>
*
* Long name of the virtual sound card. Optional.
*
*----------------------------- Device settings -------------------------------
* name
* Values: <char[80]>
*
* Name of the sound device within the virtual sound card. Optional.
*
*----------------------------- Stream settings -------------------------------
*
* type
* Values: "p", "c"
*
* Stream type: "p" - playback stream, "c" - capture stream
*
* If both capture and playback are needed then two streams need to be
* defined under the same device.
*
* unique-id
* Values: <uint32_t>
*
* After stream initialization it is assigned a unique ID (within the front
* driver), so every stream of the frontend can be identified by the
* backend by this ID. This is not equal to stream-idx as the later is
* zero based within the device, but this index is contigous within the
* driver.
*
*-------------------- Stream Request Transport Parameters --------------------
*
* event-channel
* Values: <uint32_t>
*
* The identifier of the Xen event channel used to signal activity
* in the ring buffer.
*
* ring-ref
* Values: <uint32_t>
*
* The Xen grant reference granting permission for the backend to map
* a sole page in a single page sized ring buffer.
*
******************************************************************************
* STATE DIAGRAMS
******************************************************************************
*
* Tool stack creates front and back state nodes with initial state
* XenbusStateInitialising.
* Tool stack creates and sets up frontend sound configuration nodes per domain.
*
* Front Back
* ================================= =====================================
* XenbusStateInitialising XenbusStateInitialising
* o Query backend device identification
* data.
* o Open and validate backend device.
* |
* |
* V
* XenbusStateInitWait
*
* o Query frontend configuration
* o Allocate and initialize
* event channels per configured
* playback/capture stream.
* o Publish transport parameters
* that will be in effect during
* this connection.
* |
* |
* V
* XenbusStateInitialised
*
* o Query frontend transport parameters.
* o Connect to the event channels.
* |
* |
* V
* XenbusStateConnected
*
* o Create and initialize OS
* virtual sound device instances
* as per configuration.
* |
* |
* V
* XenbusStateConnected
*
* XenbusStateUnknown
* XenbusStateClosed
* XenbusStateClosing
* o Remove virtual sound device
* o Remove event channels
* |
* |
* V
* XenbusStateClosed
*
*------------------------------- Recovery flow -------------------------------
*
* In case of frontend unrecoverable errors backend handles that as
* if frontend goes into the XenbusStateClosed state.
*
* In case of backend unrecoverable errors frontend tries removing
* the virtualized device. If this is possible at the moment of error,
* then frontend goes into the XenbusStateInitialising state and is ready for
* new connection with backend. If the virtualized device is still in use and
* cannot be removed, then frontend goes into the XenbusStateReconfiguring state
* until either the virtualized device removed or backend initiates a new
* connection. On the virtualized device removal frontend goes into the
* XenbusStateInitialising state.
*
* Note on XenbusStateReconfiguring state of the frontend: if backend has
* unrecoverable errors then frontend cannot send requests to the backend
* and thus cannot provide functionality of the virtualized device anymore.
* After backend is back to normal the virtualized device may still hold some
* state: configuration in use, allocated buffers, client application state etc.
* So, in most cases, this will require frontend to implement complex recovery
* reconnect logic. Instead, by going into XenbusStateReconfiguring state,
* frontend will make sure no new clients of the virtualized device are
* accepted, allow existing client(s) to exit gracefully by signaling error
* state etc.
* Once all the clients are gone frontend can reinitialize the virtualized
* device and get into XenbusStateInitialising state again signaling the
* backend that a new connection can be made.
*
* There are multiple conditions possible under which frontend will go from
* XenbusStateReconfiguring into XenbusStateInitialising, some of them are OS
* specific. For example:
* 1. The underlying OS framework may provide callbacks to signal that the last
* client of the virtualized device has gone and the device can be removed
* 2. Frontend can schedule a deferred work (timer/tasklet/workqueue)
* to periodically check if this is the right time to re-try removal of
* the virtualized device.
* 3. By any other means.
*
******************************************************************************
* PCM FORMATS
******************************************************************************
*
* XENSND_PCM_FORMAT_<format>[_<endian>]
*
* format: <S/U/F><bits> or <name>
* S - signed, U - unsigned, F - float
* bits - 8, 16, 24, 32
* name - MU_LAW, GSM, etc.
*
* endian: <LE/BE>, may be absent
* LE - Little endian, BE - Big endian
*/
#define XENSND_PCM_FORMAT_S8 0
#define XENSND_PCM_FORMAT_U8 1
#define XENSND_PCM_FORMAT_S16_LE 2
#define XENSND_PCM_FORMAT_S16_BE 3
#define XENSND_PCM_FORMAT_U16_LE 4
#define XENSND_PCM_FORMAT_U16_BE 5
#define XENSND_PCM_FORMAT_S24_LE 6
#define XENSND_PCM_FORMAT_S24_BE 7
#define XENSND_PCM_FORMAT_U24_LE 8
#define XENSND_PCM_FORMAT_U24_BE 9
#define XENSND_PCM_FORMAT_S32_LE 10
#define XENSND_PCM_FORMAT_S32_BE 11
#define XENSND_PCM_FORMAT_U32_LE 12
#define XENSND_PCM_FORMAT_U32_BE 13
#define XENSND_PCM_FORMAT_F32_LE 14 /* 4-byte float, IEEE-754 32-bit, */
#define XENSND_PCM_FORMAT_F32_BE 15 /* range -1.0 to 1.0 */
#define XENSND_PCM_FORMAT_F64_LE 16 /* 8-byte float, IEEE-754 64-bit, */
#define XENSND_PCM_FORMAT_F64_BE 17 /* range -1.0 to 1.0 */
#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_LE 18
#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_BE 19
#define XENSND_PCM_FORMAT_MU_LAW 20
#define XENSND_PCM_FORMAT_A_LAW 21
#define XENSND_PCM_FORMAT_IMA_ADPCM 22
#define XENSND_PCM_FORMAT_MPEG 23
#define XENSND_PCM_FORMAT_GSM 24
/*
******************************************************************************
* REQUEST CODES
******************************************************************************
*/
#define XENSND_OP_OPEN 0
#define XENSND_OP_CLOSE 1
#define XENSND_OP_READ 2
#define XENSND_OP_WRITE 3
#define XENSND_OP_SET_VOLUME 4
#define XENSND_OP_GET_VOLUME 5
#define XENSND_OP_MUTE 6
#define XENSND_OP_UNMUTE 7
/*
******************************************************************************
* XENSTORE FIELD AND PATH NAME STRINGS, HELPERS
******************************************************************************
*/
#define XENSND_DRIVER_NAME "vsnd"
#define XENSND_LIST_SEPARATOR ","
/* Field names */
#define XENSND_FIELD_BE_VERSIONS "versions"
#define XENSND_FIELD_FE_VERSION "version"
#define XENSND_FIELD_VCARD_SHORT_NAME "short-name"
#define XENSND_FIELD_VCARD_LONG_NAME "long-name"
#define XENSND_FIELD_RING_REF "ring-ref"
#define XENSND_FIELD_EVT_CHNL "event-channel"
#define XENSND_FIELD_DEVICE_NAME "name"
#define XENSND_FIELD_TYPE "type"
#define XENSND_FIELD_STREAM_UNIQUE_ID "unique-id"
#define XENSND_FIELD_CHANNELS_MIN "channels-min"
#define XENSND_FIELD_CHANNELS_MAX "channels-max"
#define XENSND_FIELD_SAMPLE_RATES "sample-rates"
#define XENSND_FIELD_SAMPLE_FORMATS "sample-formats"
#define XENSND_FIELD_BUFFER_SIZE "buffer-size"
/* Stream type field values. */
#define XENSND_STREAM_TYPE_PLAYBACK "p"
#define XENSND_STREAM_TYPE_CAPTURE "c"
/* Sample rate max string length */
#define XENSND_SAMPLE_RATE_MAX_LEN 11
/* Sample format field values */
#define XENSND_SAMPLE_FORMAT_MAX_LEN 24
#define XENSND_PCM_FORMAT_S8_STR "s8"
#define XENSND_PCM_FORMAT_U8_STR "u8"
#define XENSND_PCM_FORMAT_S16_LE_STR "s16_le"
#define XENSND_PCM_FORMAT_S16_BE_STR "s16_be"
#define XENSND_PCM_FORMAT_U16_LE_STR "u16_le"
#define XENSND_PCM_FORMAT_U16_BE_STR "u16_be"
#define XENSND_PCM_FORMAT_S24_LE_STR "s24_le"
#define XENSND_PCM_FORMAT_S24_BE_STR "s24_be"
#define XENSND_PCM_FORMAT_U24_LE_STR "u24_le"
#define XENSND_PCM_FORMAT_U24_BE_STR "u24_be"
#define XENSND_PCM_FORMAT_S32_LE_STR "s32_le"
#define XENSND_PCM_FORMAT_S32_BE_STR "s32_be"
#define XENSND_PCM_FORMAT_U32_LE_STR "u32_le"
#define XENSND_PCM_FORMAT_U32_BE_STR "u32_be"
#define XENSND_PCM_FORMAT_F32_LE_STR "float_le"
#define XENSND_PCM_FORMAT_F32_BE_STR "float_be"
#define XENSND_PCM_FORMAT_F64_LE_STR "float64_le"
#define XENSND_PCM_FORMAT_F64_BE_STR "float64_be"
#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_LE_STR "iec958_subframe_le"
#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_BE_STR "iec958_subframe_be"
#define XENSND_PCM_FORMAT_MU_LAW_STR "mu_law"
#define XENSND_PCM_FORMAT_A_LAW_STR "a_law"
#define XENSND_PCM_FORMAT_IMA_ADPCM_STR "ima_adpcm"
#define XENSND_PCM_FORMAT_MPEG_STR "mpeg"
#define XENSND_PCM_FORMAT_GSM_STR "gsm"
/*
******************************************************************************
* STATUS RETURN CODES
******************************************************************************
*
* Status return code is zero on success and -XEN_EXX on failure.
*
******************************************************************************
* Assumptions
******************************************************************************
* o usage of grant reference 0 as invalid grant reference:
* grant reference 0 is valid, but never exposed to a PV driver,
* because of the fact it is already in use/reserved by the PV console.
* o all references in this document to page sizes must be treated
* as pages of size XEN_PAGE_SIZE unless otherwise noted.
*
******************************************************************************
* Description of the protocol between frontend and backend driver
******************************************************************************
*
* The two halves of a Para-virtual sound driver communicate with
* each other using shared pages and event channels.
* Shared page contains a ring with request/response packets.
*
* Packets, used for input/output operations, e.g. read/write, set/get volume,
* etc., provide offset/length fields in order to allow asynchronous protocol
* operation with buffer space sharing: part of the buffer allocated at
* XENSND_OP_OPEN can be used for audio samples and part, for example,
* for volume control.
*
* All reserved fields in the structures below must be 0.
*
*---------------------------------- Requests ---------------------------------
*
* All request packets have the same length (32 octets)
* All request packets have common header:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | operation | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* id - uint16_t, private guest value, echoed in response
* operation - uint8_t, operation code, XENSND_OP_???
*
* For all packets which use offset and length:
* offset - uint32_t, read or write data offset within the shared buffer,
* passed with XENSND_OP_OPEN request, octets,
* [0; XENSND_OP_OPEN.buffer_sz - 1].
* length - uint32_t, read or write data length, octets
*
* Request open - open a PCM stream for playback or capture:
*
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | XENSND_OP_OPEN | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | pcm_rate | 12
* +----------------+----------------+----------------+----------------+
* | pcm_format | pcm_channels | reserved | 16
* +----------------+----------------+----------------+----------------+
* | buffer_sz | 20
* +----------------+----------------+----------------+----------------+
* | gref_directory | 24
* +----------------+----------------+----------------+----------------+
* | reserved | 28
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 32
* +----------------+----------------+----------------+----------------+
*
* pcm_rate - uint32_t, stream data rate, Hz
* pcm_format - uint8_t, XENSND_PCM_FORMAT_XXX value
* pcm_channels - uint8_t, number of channels of this stream,
* [channels-min; channels-max]
* buffer_sz - uint32_t, buffer size to be allocated, octets
* gref_directory - grant_ref_t, a reference to the first shared page
* describing shared buffer references. At least one page exists. If shared
* buffer size (buffer_sz) exceeds what can be addressed by this single page,
* then reference to the next page must be supplied (see gref_dir_next_page
* below)
*/
struct xensnd_open_req {
uint32_t pcm_rate;
uint8_t pcm_format;
uint8_t pcm_channels;
uint16_t reserved;
uint32_t buffer_sz;
grant_ref_t gref_directory;
};
/*
* Shared page for XENSND_OP_OPEN buffer descriptor (gref_directory in the
* request) employs a list of pages, describing all pages of the shared data
* buffer:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | gref_dir_next_page | 4
* +----------------+----------------+----------------+----------------+
* | gref[0] | 8
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | gref[i] | i*4+8
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | gref[N - 1] | N*4+8
* +----------------+----------------+----------------+----------------+
*
* gref_dir_next_page - grant_ref_t, reference to the next page describing
* page directory. Must be 0 if there are no more pages in the list.
* gref[i] - grant_ref_t, reference to a shared page of the buffer
* allocated at XENSND_OP_OPEN
*
* Number of grant_ref_t entries in the whole page directory is not
* passed, but instead can be calculated as:
* num_grefs_total = (XENSND_OP_OPEN.buffer_sz + XEN_PAGE_SIZE - 1) /
* XEN_PAGE_SIZE
*/
struct xensnd_page_directory {
grant_ref_t gref_dir_next_page;
grant_ref_t gref[1]; /* Variable length */
};
/*
* Request close - close an opened pcm stream:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | XENSND_OP_CLOSE| reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 32
* +----------------+----------------+----------------+----------------+
*
* Request read/write - used for read (for capture) or write (for playback):
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | operation | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | offset | 12
* +----------------+----------------+----------------+----------------+
* | length | 16
* +----------------+----------------+----------------+----------------+
* | reserved | 20
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 32
* +----------------+----------------+----------------+----------------+
*
* operation - XENSND_OP_READ for read or XENSND_OP_WRITE for write
*/
struct xensnd_rw_req {
uint32_t offset;
uint32_t length;
};
/*
* Request set/get volume - set/get channels' volume of the stream given:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | operation | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | offset | 12
* +----------------+----------------+----------------+----------------+
* | length | 16
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 32
* +----------------+----------------+----------------+----------------+
*
* operation - XENSND_OP_SET_VOLUME for volume set
* or XENSND_OP_GET_VOLUME for volume get
* Buffer passed with XENSND_OP_OPEN is used to exchange volume
* values:
*
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | channel[0] | 4
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | channel[i] | i*4
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | channel[N - 1] | (N-1)*4
* +----------------+----------------+----------------+----------------+
*
* N = XENSND_OP_OPEN.pcm_channels
* i - uint8_t, index of a channel
* channel[i] - sint32_t, volume of i-th channel
* Volume is expressed as a signed value in steps of 0.001 dB,
* while 0 being 0 dB.
*
* Request mute/unmute - mute/unmute stream:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | operation | reserved | 4
* +----------------+----------------+----------------+----------------+
* | reserved | 8
* +----------------+----------------+----------------+----------------+
* | offset | 12
* +----------------+----------------+----------------+----------------+
* | length | 16
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 32
* +----------------+----------------+----------------+----------------+
*
* operation - XENSND_OP_MUTE for mute or XENSND_OP_UNMUTE for unmute
* Buffer passed with XENSND_OP_OPEN is used to exchange mute/unmute
* values:
*
* 0 octet
* +----------------+----------------+----------------+----------------+
* | channel[0] | 4
* +----------------+----------------+----------------+----------------+
* +/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | channel[i] | i*4
* +----------------+----------------+----------------+----------------+
* +/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | channel[N - 1] | (N-1)*4
* +----------------+----------------+----------------+----------------+
*
* N = XENSND_OP_OPEN.pcm_channels
* i - uint8_t, index of a channel
* channel[i] - uint8_t, non-zero if i-th channel needs to be muted/unmuted
*
*------------------------------------ N.B. -----------------------------------
*
* The 'struct xensnd_rw_req' is also used for XENSND_OP_SET_VOLUME,
* XENSND_OP_GET_VOLUME, XENSND_OP_MUTE, XENSND_OP_UNMUTE.
*/
/*
*---------------------------------- Responses --------------------------------
*
* All response packets have the same length (32 octets)
*
* Response for all requests:
* 0 1 2 3 octet
* +----------------+----------------+----------------+----------------+
* | id | operation | reserved | 4
* +----------------+----------------+----------------+----------------+
* | status | 8
* +----------------+----------------+----------------+----------------+
* | reserved | 12
* +----------------+----------------+----------------+----------------+
* |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
* +----------------+----------------+----------------+----------------+
* | reserved | 32
* +----------------+----------------+----------------+----------------+
*
* id - uint16_t, copied from the request
* operation - uint8_t, XENSND_OP_* - copied from request
* status - int32_t, response status, zero on success and -XEN_EXX on failure
*/
struct xensnd_req {
uint16_t id;
uint8_t operation;
uint8_t reserved[5];
union {
struct xensnd_open_req open;
struct xensnd_rw_req rw;
uint8_t reserved[24];
} op;
};
struct xensnd_resp {
uint16_t id;
uint8_t operation;
uint8_t reserved;
int32_t status;
uint8_t reserved1[24];
};
DEFINE_RING_TYPES(xen_sndif, struct xensnd_req, struct xensnd_resp);
#endif /* __XEN_PUBLIC_IO_SNDIF_H__ */

View File

@ -22,6 +22,8 @@ void xen_timer_resume(void);
void xen_arch_resume(void); void xen_arch_resume(void);
void xen_arch_suspend(void); void xen_arch_suspend(void);
void xen_reboot(int reason);
void xen_resume_notifier_register(struct notifier_block *nb); void xen_resume_notifier_register(struct notifier_block *nb);
void xen_resume_notifier_unregister(struct notifier_block *nb); void xen_resume_notifier_unregister(struct notifier_block *nb);
@ -34,11 +36,25 @@ u64 xen_steal_clock(int cpu);
int xen_setup_shutdown_event(void); int xen_setup_shutdown_event(void);
extern unsigned long *xen_contiguous_bitmap; extern unsigned long *xen_contiguous_bitmap;
#ifdef CONFIG_XEN_PV
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits, unsigned int address_bits,
dma_addr_t *dma_handle); dma_addr_t *dma_handle);
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
#else
static inline int xen_create_contiguous_region(phys_addr_t pstart,
unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle)
{
return 0;
}
static inline void xen_destroy_contiguous_region(phys_addr_t pstart,
unsigned int order) { }
#endif
struct vm_area_struct; struct vm_area_struct;
@ -120,6 +136,9 @@ efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
unsigned long count, u64 *max_size, unsigned long count, u64 *max_size,
int *reset_type); int *reset_type);
void xen_efi_reset_system(int reset_type, efi_status_t status,
unsigned long data_size, efi_char16_t *data);
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT

View File

@ -22,6 +22,15 @@ config NET_9P_VIRTIO
This builds support for a transports between This builds support for a transports between
guest partitions and a host partition. guest partitions and a host partition.
config NET_9P_XEN
depends on XEN
select XEN_XENBUS_FRONTEND
tristate "9P Xen Transport"
help
This builds support for a transport for 9pfs between
two Xen domains.
config NET_9P_RDMA config NET_9P_RDMA
depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
tristate "9P RDMA Transport (Experimental)" tristate "9P RDMA Transport (Experimental)"

View File

@ -1,4 +1,5 @@
obj-$(CONFIG_NET_9P) := 9pnet.o obj-$(CONFIG_NET_9P) := 9pnet.o
obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
@ -14,5 +15,8 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
9pnet_virtio-objs := \ 9pnet_virtio-objs := \
trans_virtio.o \ trans_virtio.o \
9pnet_xen-objs := \
trans_xen.o \
9pnet_rdma-objs := \ 9pnet_rdma-objs := \
trans_rdma.o \ trans_rdma.o \

545
net/9p/trans_xen.c Normal file
View File

@ -0,0 +1,545 @@
/*
* linux/fs/9p/trans_xen
*
* Xen transport layer.
*
* Copyright (C) 2017 by Stefano Stabellini <stefano@aporeto.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <xen/events.h>
#include <xen/grant_table.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
#include <xen/interface/io/9pfs.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/rwlock.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
#include <net/9p/transport.h>
#define XEN_9PFS_NUM_RINGS 2
#define XEN_9PFS_RING_ORDER 6
#define XEN_9PFS_RING_SIZE XEN_FLEX_RING_SIZE(XEN_9PFS_RING_ORDER)
struct xen_9pfs_header {
uint32_t size;
uint8_t id;
uint16_t tag;
/* uint8_t sdata[]; */
} __attribute__((packed));
/* One per ring, more than one per 9pfs share */
struct xen_9pfs_dataring {
struct xen_9pfs_front_priv *priv;
struct xen_9pfs_data_intf *intf;
grant_ref_t ref;
int evtchn;
int irq;
/* protect a ring from concurrent accesses */
spinlock_t lock;
struct xen_9pfs_data data;
wait_queue_head_t wq;
struct work_struct work;
};
/* One per 9pfs share */
struct xen_9pfs_front_priv {
struct list_head list;
struct xenbus_device *dev;
char *tag;
struct p9_client *client;
int num_rings;
struct xen_9pfs_dataring *rings;
};
static LIST_HEAD(xen_9pfs_devs);
static DEFINE_RWLOCK(xen_9pfs_lock);
/* We don't currently allow canceling of requests */
static int p9_xen_cancel(struct p9_client *client, struct p9_req_t *req)
{
return 1;
}
static int p9_xen_create(struct p9_client *client, const char *addr, char *args)
{
struct xen_9pfs_front_priv *priv;
read_lock(&xen_9pfs_lock);
list_for_each_entry(priv, &xen_9pfs_devs, list) {
if (!strcmp(priv->tag, addr)) {
priv->client = client;
read_unlock(&xen_9pfs_lock);
return 0;
}
}
read_unlock(&xen_9pfs_lock);
return -EINVAL;
}
static void p9_xen_close(struct p9_client *client)
{
struct xen_9pfs_front_priv *priv;
read_lock(&xen_9pfs_lock);
list_for_each_entry(priv, &xen_9pfs_devs, list) {
if (priv->client == client) {
priv->client = NULL;
read_unlock(&xen_9pfs_lock);
return;
}
}
read_unlock(&xen_9pfs_lock);
}
static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size)
{
RING_IDX cons, prod;
cons = ring->intf->out_cons;
prod = ring->intf->out_prod;
virt_mb();
return XEN_9PFS_RING_SIZE -
xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) >= size;
}
static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
{
struct xen_9pfs_front_priv *priv = NULL;
RING_IDX cons, prod, masked_cons, masked_prod;
unsigned long flags;
u32 size = p9_req->tc->size;
struct xen_9pfs_dataring *ring;
int num;
read_lock(&xen_9pfs_lock);
list_for_each_entry(priv, &xen_9pfs_devs, list) {
if (priv->client == client)
break;
}
read_unlock(&xen_9pfs_lock);
if (!priv || priv->client != client)
return -EINVAL;
num = p9_req->tc->tag % priv->num_rings;
ring = &priv->rings[num];
again:
while (wait_event_interruptible(ring->wq,
p9_xen_write_todo(ring, size)) != 0)
;
spin_lock_irqsave(&ring->lock, flags);
cons = ring->intf->out_cons;
prod = ring->intf->out_prod;
virt_mb();
if (XEN_9PFS_RING_SIZE - xen_9pfs_queued(prod, cons,
XEN_9PFS_RING_SIZE) < size) {
spin_unlock_irqrestore(&ring->lock, flags);
goto again;
}
masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
&masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
p9_req->status = REQ_STATUS_SENT;
virt_wmb(); /* write ring before updating pointer */
prod += size;
ring->intf->out_prod = prod;
spin_unlock_irqrestore(&ring->lock, flags);
notify_remote_via_irq(ring->irq);
return 0;
}
static void p9_xen_response(struct work_struct *work)
{
struct xen_9pfs_front_priv *priv;
struct xen_9pfs_dataring *ring;
RING_IDX cons, prod, masked_cons, masked_prod;
struct xen_9pfs_header h;
struct p9_req_t *req;
int status;
ring = container_of(work, struct xen_9pfs_dataring, work);
priv = ring->priv;
while (1) {
cons = ring->intf->in_cons;
prod = ring->intf->in_prod;
virt_rmb();
if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) <
sizeof(h)) {
notify_remote_via_irq(ring->irq);
return;
}
masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
/* First, read just the header */
xen_9pfs_read_packet(&h, ring->data.in, sizeof(h),
masked_prod, &masked_cons,
XEN_9PFS_RING_SIZE);
req = p9_tag_lookup(priv->client, h.tag);
if (!req || req->status != REQ_STATUS_SENT) {
dev_warn(&priv->dev->dev, "Wrong req tag=%x\n", h.tag);
cons += h.size;
virt_mb();
ring->intf->in_cons = cons;
continue;
}
memcpy(req->rc, &h, sizeof(h));
req->rc->offset = 0;
masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
/* Then, read the whole packet (including the header) */
xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
masked_prod, &masked_cons,
XEN_9PFS_RING_SIZE);
virt_mb();
cons += h.size;
ring->intf->in_cons = cons;
status = (req->status != REQ_STATUS_ERROR) ?
REQ_STATUS_RCVD : REQ_STATUS_ERROR;
p9_client_cb(priv->client, req, status);
}
}
static irqreturn_t xen_9pfs_front_event_handler(int irq, void *r)
{
struct xen_9pfs_dataring *ring = r;
if (!ring || !ring->priv->client) {
/* ignore spurious interrupt */
return IRQ_HANDLED;
}
wake_up_interruptible(&ring->wq);
schedule_work(&ring->work);
return IRQ_HANDLED;
}
static struct p9_trans_module p9_xen_trans = {
.name = "xen",
.maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT),
.def = 1,
.create = p9_xen_create,
.close = p9_xen_close,
.request = p9_xen_request,
.cancel = p9_xen_cancel,
.owner = THIS_MODULE,
};
static const struct xenbus_device_id xen_9pfs_front_ids[] = {
{ "9pfs" },
{ "" }
};
static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
{
int i, j;
write_lock(&xen_9pfs_lock);
list_del(&priv->list);
write_unlock(&xen_9pfs_lock);
for (i = 0; i < priv->num_rings; i++) {
if (!priv->rings[i].intf)
break;
if (priv->rings[i].irq > 0)
unbind_from_irqhandler(priv->rings[i].irq, priv->dev);
if (priv->rings[i].data.in) {
for (j = 0; j < (1 << XEN_9PFS_RING_ORDER); j++) {
grant_ref_t ref;
ref = priv->rings[i].intf->ref[j];
gnttab_end_foreign_access(ref, 0, 0);
}
free_pages((unsigned long)priv->rings[i].data.in,
XEN_9PFS_RING_ORDER -
(PAGE_SHIFT - XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
free_page((unsigned long)priv->rings[i].intf);
}
kfree(priv->rings);
kfree(priv->tag);
kfree(priv);
}
static int xen_9pfs_front_remove(struct xenbus_device *dev)
{
struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev);
dev_set_drvdata(&dev->dev, NULL);
xen_9pfs_front_free(priv);
return 0;
}
static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
struct xen_9pfs_dataring *ring)
{
int i = 0;
int ret = -ENOMEM;
void *bytes = NULL;
init_waitqueue_head(&ring->wq);
spin_lock_init(&ring->lock);
INIT_WORK(&ring->work, p9_xen_response);
ring->intf = (struct xen_9pfs_data_intf *)get_zeroed_page(GFP_KERNEL);
if (!ring->intf)
return ret;
ret = gnttab_grant_foreign_access(dev->otherend_id,
virt_to_gfn(ring->intf), 0);
if (ret < 0)
goto out;
ring->ref = ret;
bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT));
if (!bytes) {
ret = -ENOMEM;
goto out;
}
for (; i < (1 << XEN_9PFS_RING_ORDER); i++) {
ret = gnttab_grant_foreign_access(
dev->otherend_id, virt_to_gfn(bytes) + i, 0);
if (ret < 0)
goto out;
ring->intf->ref[i] = ret;
}
ring->intf->ring_order = XEN_9PFS_RING_ORDER;
ring->data.in = bytes;
ring->data.out = bytes + XEN_9PFS_RING_SIZE;
ret = xenbus_alloc_evtchn(dev, &ring->evtchn);
if (ret)
goto out;
ring->irq = bind_evtchn_to_irqhandler(ring->evtchn,
xen_9pfs_front_event_handler,
0, "xen_9pfs-frontend", ring);
if (ring->irq >= 0)
return 0;
xenbus_free_evtchn(dev, ring->evtchn);
ret = ring->irq;
out:
if (bytes) {
for (i--; i >= 0; i--)
gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
free_pages((unsigned long)bytes,
XEN_9PFS_RING_ORDER -
(PAGE_SHIFT - XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(ring->ref, 0, 0);
free_page((unsigned long)ring->intf);
return ret;
}
static int xen_9pfs_front_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
int ret, i;
struct xenbus_transaction xbt;
struct xen_9pfs_front_priv *priv = NULL;
char *versions;
unsigned int max_rings, max_ring_order, len = 0;
versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
if (!len)
return -EINVAL;
if (strcmp(versions, "1")) {
kfree(versions);
return -EINVAL;
}
kfree(versions);
max_rings = xenbus_read_unsigned(dev->otherend, "max-rings", 0);
if (max_rings < XEN_9PFS_NUM_RINGS)
return -EINVAL;
max_ring_order = xenbus_read_unsigned(dev->otherend,
"max-ring-page-order", 0);
if (max_ring_order < XEN_9PFS_RING_ORDER)
return -EINVAL;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
priv->dev = dev;
priv->num_rings = XEN_9PFS_NUM_RINGS;
priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings),
GFP_KERNEL);
if (!priv->rings) {
kfree(priv);
return -ENOMEM;
}
for (i = 0; i < priv->num_rings; i++) {
priv->rings[i].priv = priv;
ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i]);
if (ret < 0)
goto error;
}
again:
ret = xenbus_transaction_start(&xbt);
if (ret) {
xenbus_dev_fatal(dev, ret, "starting transaction");
goto error;
}
ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1);
if (ret)
goto error_xenbus;
ret = xenbus_printf(xbt, dev->nodename, "num-rings", "%u",
priv->num_rings);
if (ret)
goto error_xenbus;
for (i = 0; i < priv->num_rings; i++) {
char str[16];
BUILD_BUG_ON(XEN_9PFS_NUM_RINGS > 9);
sprintf(str, "ring-ref%u", i);
ret = xenbus_printf(xbt, dev->nodename, str, "%d",
priv->rings[i].ref);
if (ret)
goto error_xenbus;
sprintf(str, "event-channel-%u", i);
ret = xenbus_printf(xbt, dev->nodename, str, "%u",
priv->rings[i].evtchn);
if (ret)
goto error_xenbus;
}
priv->tag = xenbus_read(xbt, dev->nodename, "tag", NULL);
if (!priv->tag) {
ret = -EINVAL;
goto error_xenbus;
}
ret = xenbus_transaction_end(xbt, 0);
if (ret) {
if (ret == -EAGAIN)
goto again;
xenbus_dev_fatal(dev, ret, "completing transaction");
goto error;
}
write_lock(&xen_9pfs_lock);
list_add_tail(&priv->list, &xen_9pfs_devs);
write_unlock(&xen_9pfs_lock);
dev_set_drvdata(&dev->dev, priv);
xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
error_xenbus:
xenbus_transaction_end(xbt, 1);
xenbus_dev_fatal(dev, ret, "writing xenstore");
error:
dev_set_drvdata(&dev->dev, NULL);
xen_9pfs_front_free(priv);
return ret;
}
static int xen_9pfs_front_resume(struct xenbus_device *dev)
{
dev_warn(&dev->dev, "suspsend/resume unsupported\n");
return 0;
}
static void xen_9pfs_front_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
switch (backend_state) {
case XenbusStateReconfiguring:
case XenbusStateReconfigured:
case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateUnknown:
break;
case XenbusStateInitWait:
break;
case XenbusStateConnected:
xenbus_switch_state(dev, XenbusStateConnected);
break;
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
/* Missed the backend's CLOSING state -- fallthrough */
case XenbusStateClosing:
xenbus_frontend_closed(dev);
break;
}
}
static struct xenbus_driver xen_9pfs_front_driver = {
.ids = xen_9pfs_front_ids,
.probe = xen_9pfs_front_probe,
.remove = xen_9pfs_front_remove,
.resume = xen_9pfs_front_resume,
.otherend_changed = xen_9pfs_front_changed,
};
int p9_trans_xen_init(void)
{
if (!xen_domain())
return -ENODEV;
pr_info("Initialising Xen transport for 9pfs\n");
v9fs_register_trans(&p9_xen_trans);
return xenbus_register_frontend(&xen_9pfs_front_driver);
}
module_init(p9_trans_xen_init);
void p9_trans_xen_exit(void)
{
v9fs_unregister_trans(&p9_xen_trans);
return xenbus_unregister_driver(&xen_9pfs_front_driver);
}
module_exit(p9_trans_xen_exit);