xen: branch for v6.12-rc1a

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRTLbB6QfY48x44uB6AXGG7T9hjvgUCZvZ8dgAKCRCAXGG7T9hj
 vhirAQCR1LAU+czZlqmx6jmKRPTGff1ss66vh04XbtgTjH+8PQEA8O5KvD/KnnxY
 AnrOvrx6fTLwR6iTN7ANVvPO3kGK/w0=
 =0Tol
 -----END PGP SIGNATURE-----

Merge tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull more xen updates from Juergen Gross:
 "A second round of Xen related changes and features:

   - a small fix of the xen-pciback driver for a warning issued by
     sparse

   - support PCI passthrough when using a PVH dom0

   - enable loading the kernel in PVH mode at arbitrary addresses,
     avoiding conflicts with the memory map when running as a Xen dom0
     using the host memory layout"

* tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  x86/pvh: Add 64bit relocation page tables
  x86/kernel: Move page table macros to header
  x86/pvh: Set phys_base when calling xen_prepare_pvh()
  x86/pvh: Make PVH entrypoint PIC for x86-64
  xen: sync elfnote.h from xen tree
  xen/pciback: fix cast to restricted pci_ers_result_t and pci_power_t
  xen/privcmd: Add new syscall to get gsi from dev
  xen/pvh: Setup gsi for passthrough device
  xen/pci: Add a function to reset device for xen
This commit is contained in:
Linus Torvalds 2024-09-27 09:55:30 -07:00
commit 653608c67a
17 changed files with 509 additions and 47 deletions

View File

@ -270,5 +270,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
#include <asm/pgtable-invert.h> #include <asm/pgtable-invert.h>
#endif /* !__ASSEMBLY__ */ #else /* __ASSEMBLY__ */
#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
L4_START_KERNEL = l4_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
#define SYM_DATA_START_PAGE_ALIGNED(name) \
SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
/* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \
i = 0 ; \
.rept (COUNT) ; \
.quad (START) + (i << PMD_SHIFT) + (PERM) ; \
i = i + 1 ; \
.endr
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */ #endif /* _ASM_X86_PGTABLE_64_H */

View File

@ -32,13 +32,6 @@
* We are not able to switch in one step to the final KERNEL ADDRESS SPACE * We are not able to switch in one step to the final KERNEL ADDRESS SPACE
* because we need identity-mapped pages. * because we need identity-mapped pages.
*/ */
#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
L4_START_KERNEL = l4_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
__HEAD __HEAD
.code64 .code64
@ -577,9 +570,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
SYM_CODE_END(vc_no_ghcb) SYM_CODE_END(vc_no_ghcb)
#endif #endif
#define SYM_DATA_START_PAGE_ALIGNED(name) \
SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/* /*
* Each PGD needs to be 8k long and 8k aligned. We do not * Each PGD needs to be 8k long and 8k aligned. We do not
@ -601,14 +591,6 @@ SYM_CODE_END(vc_no_ghcb)
#define PTI_USER_PGD_FILL 0 #define PTI_USER_PGD_FILL 0
#endif #endif
/* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \
i = 0 ; \
.rept (COUNT) ; \
.quad (START) + (i << PMD_SHIFT) + (PERM) ; \
i = i + 1 ; \
.endr
__INITDATA __INITDATA
.balign 4 .balign 4
@ -708,8 +690,6 @@ SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt)
.endr .endr
SYM_DATA_END(level1_fixmap_pgt) SYM_DATA_END(level1_fixmap_pgt)
#undef PMDS
.data .data
.align 16 .align 16

View File

@ -7,6 +7,7 @@
.code32 .code32
.text .text
#define _pa(x) ((x) - __START_KERNEL_map) #define _pa(x) ((x) - __START_KERNEL_map)
#define rva(x) ((x) - pvh_start_xen)
#include <linux/elfnote.h> #include <linux/elfnote.h>
#include <linux/init.h> #include <linux/init.h>
@ -15,6 +16,7 @@
#include <asm/segment.h> #include <asm/segment.h>
#include <asm/asm.h> #include <asm/asm.h>
#include <asm/boot.h> #include <asm/boot.h>
#include <asm/pgtable.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/nospec-branch.h> #include <asm/nospec-branch.h>
@ -54,7 +56,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
UNWIND_HINT_END_OF_STACK UNWIND_HINT_END_OF_STACK
cld cld
lgdt (_pa(gdt)) /*
* See the comment for startup_32 for more details. We need to
* execute a call to get the execution address to be position
* independent, but we don't have a stack. Save and restore the
* magic field of start_info in ebx, and use that as the stack.
*/
mov (%ebx), %eax
leal 4(%ebx), %esp
ANNOTATE_INTRA_FUNCTION_CALL
call 1f
1: popl %ebp
mov %eax, (%ebx)
subl $rva(1b), %ebp
movl $0, %esp
leal rva(gdt)(%ebp), %eax
leal rva(gdt_start)(%ebp), %ecx
movl %ecx, 2(%eax)
lgdt (%eax)
mov $PVH_DS_SEL,%eax mov $PVH_DS_SEL,%eax
mov %eax,%ds mov %eax,%ds
@ -62,14 +82,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
mov %eax,%ss mov %eax,%ss
/* Stash hvm_start_info. */ /* Stash hvm_start_info. */
mov $_pa(pvh_start_info), %edi leal rva(pvh_start_info)(%ebp), %edi
mov %ebx, %esi mov %ebx, %esi
mov _pa(pvh_start_info_sz), %ecx movl rva(pvh_start_info_sz)(%ebp), %ecx
shr $2,%ecx shr $2,%ecx
rep rep
movsl movsl
mov $_pa(early_stack_end), %esp leal rva(early_stack_end)(%ebp), %esp
/* Enable PAE mode. */ /* Enable PAE mode. */
mov %cr4, %eax mov %cr4, %eax
@ -83,31 +103,86 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
btsl $_EFER_LME, %eax btsl $_EFER_LME, %eax
wrmsr wrmsr
mov %ebp, %ebx
subl $_pa(pvh_start_xen), %ebx /* offset */
jz .Lpagetable_done
/* Fixup page-tables for relocation. */
leal rva(pvh_init_top_pgt)(%ebp), %edi
movl $PTRS_PER_PGD, %ecx
2:
testl $_PAGE_PRESENT, 0x00(%edi)
jz 1f
addl %ebx, 0x00(%edi)
1:
addl $8, %edi
decl %ecx
jnz 2b
/* L3 ident has a single entry. */
leal rva(pvh_level3_ident_pgt)(%ebp), %edi
addl %ebx, 0x00(%edi)
leal rva(pvh_level3_kernel_pgt)(%ebp), %edi
addl %ebx, (PAGE_SIZE - 16)(%edi)
addl %ebx, (PAGE_SIZE - 8)(%edi)
/* pvh_level2_ident_pgt is fine - large pages */
/* pvh_level2_kernel_pgt needs adjustment - large pages */
leal rva(pvh_level2_kernel_pgt)(%ebp), %edi
movl $PTRS_PER_PMD, %ecx
2:
testl $_PAGE_PRESENT, 0x00(%edi)
jz 1f
addl %ebx, 0x00(%edi)
1:
addl $8, %edi
decl %ecx
jnz 2b
.Lpagetable_done:
/* Enable pre-constructed page tables. */ /* Enable pre-constructed page tables. */
mov $_pa(init_top_pgt), %eax leal rva(pvh_init_top_pgt)(%ebp), %eax
mov %eax, %cr3 mov %eax, %cr3
mov $(X86_CR0_PG | X86_CR0_PE), %eax mov $(X86_CR0_PG | X86_CR0_PE), %eax
mov %eax, %cr0 mov %eax, %cr0
/* Jump to 64-bit mode. */ /* Jump to 64-bit mode. */
ljmp $PVH_CS_SEL, $_pa(1f) pushl $PVH_CS_SEL
leal rva(1f)(%ebp), %eax
pushl %eax
lretl
/* 64-bit entry point. */ /* 64-bit entry point. */
.code64 .code64
1: 1:
UNWIND_HINT_END_OF_STACK
/* Set base address in stack canary descriptor. */ /* Set base address in stack canary descriptor. */
mov $MSR_GS_BASE,%ecx mov $MSR_GS_BASE,%ecx
mov $_pa(canary), %eax leal canary(%rip), %eax
xor %edx, %edx xor %edx, %edx
wrmsr wrmsr
/*
* Calculate load offset and store in phys_base. __pa() needs
* phys_base set to calculate the hypercall page in xen_pvh_init().
*/
movq %rbp, %rbx
subq $_pa(pvh_start_xen), %rbx
movq %rbx, phys_base(%rip)
call xen_prepare_pvh call xen_prepare_pvh
/*
* Clear phys_base. __startup_64 will *add* to its value,
* so reset to 0.
*/
xor %rbx, %rbx
movq %rbx, phys_base(%rip)
/* startup_64 expects boot_params in %rsi. */ /* startup_64 expects boot_params in %rsi. */
mov $_pa(pvh_bootparams), %rsi lea pvh_bootparams(%rip), %rsi
mov $_pa(startup_64), %rax jmp startup_64
ANNOTATE_RETPOLINE_SAFE
jmp *%rax
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
@ -143,7 +218,7 @@ SYM_CODE_END(pvh_start_xen)
.balign 8 .balign 8
SYM_DATA_START_LOCAL(gdt) SYM_DATA_START_LOCAL(gdt)
.word gdt_end - gdt_start .word gdt_end - gdt_start
.long _pa(gdt_start) .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
.word 0 .word 0
SYM_DATA_END(gdt) SYM_DATA_END(gdt)
SYM_DATA_START_LOCAL(gdt_start) SYM_DATA_START_LOCAL(gdt_start)
@ -163,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack)
.fill BOOT_STACK_SIZE, 1, 0 .fill BOOT_STACK_SIZE, 1, 0
SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
#ifdef CONFIG_X86_64
/*
* Xen PVH needs a set of identity mapped and kernel high mapping
* page tables. pvh_start_xen starts running on the identity mapped
* page tables, but xen_prepare_pvh calls into the high mapping.
* These page tables need to be relocatable and are only used until
* startup_64 transitions to init_top_pgt.
*/
SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
.quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0
.quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org pvh_init_top_pgt + L4_START_KERNEL * 8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
SYM_DATA_END(pvh_init_top_pgt)
SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
.quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.fill 511, 8, 0
SYM_DATA_END(pvh_level3_ident_pgt)
SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
/*
* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
*
* Note: This sets _PAGE_GLOBAL despite whether
* the CPU supports it or it is enabled. But,
* the CPU should ignore the bit.
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
SYM_DATA_END(pvh_level2_ident_pgt)
SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
.fill L3_START_KERNEL, 8, 0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
.quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.quad 0 /* no fixmap */
SYM_DATA_END(pvh_level3_kernel_pgt)
SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
/*
* Kernel high mapping.
*
* The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
* virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
* 512 MiB otherwise.
*
* (NOTE: after that starts the module area, see MODULES_VADDR.)
*
* This table is eventually used by the kernel during normal runtime.
* Care must be taken to clear out undesired bits later, like _PAGE_RW
* or _PAGE_GLOBAL in some cases.
*/
PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE)
SYM_DATA_END(pvh_level2_kernel_pgt)
ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
.long CONFIG_PHYSICAL_ALIGN;
.long LOAD_PHYSICAL_ADDR;
.long KERNEL_IMAGE_SIZE - 1)
#endif
ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
_ASM_PTR (pvh_start_xen - __START_KERNEL_map)) _ASM_PTR (pvh_start_xen - __START_KERNEL_map))

View File

@ -4,6 +4,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <xen/hvc-console.h> #include <xen/hvc-console.h>
#include <xen/acpi.h>
#include <asm/bootparam.h> #include <asm/bootparam.h>
#include <asm/io_apic.h> #include <asm/io_apic.h>
@ -28,6 +29,28 @@
bool __ro_after_init xen_pvh; bool __ro_after_init xen_pvh;
EXPORT_SYMBOL_GPL(xen_pvh); EXPORT_SYMBOL_GPL(xen_pvh);
#ifdef CONFIG_XEN_DOM0
int xen_pvh_setup_gsi(int gsi, int trigger, int polarity)
{
int ret;
struct physdev_setup_gsi setup_gsi;
setup_gsi.gsi = gsi;
setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1);
setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
ret = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
if (ret == -EEXIST) {
xen_raw_printk("Already setup the GSI :%d\n", gsi);
ret = 0;
} else if (ret)
xen_raw_printk("Fail to setup GSI (%d)!\n", gsi);
return ret;
}
EXPORT_SYMBOL_GPL(xen_pvh_setup_gsi);
#endif
/* /*
* Reserve e820 UNUSABLE regions to inflate the memory balloon. * Reserve e820 UNUSABLE regions to inflate the memory balloon.
* *

View File

@ -288,7 +288,7 @@ static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
} }
#endif /* CONFIG_X86_IO_APIC */ #endif /* CONFIG_X86_IO_APIC */
static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
{ {
struct acpi_prt_entry *entry = NULL; struct acpi_prt_entry *entry = NULL;
struct pci_dev *bridge; struct pci_dev *bridge;

View File

@ -261,6 +261,7 @@ config XEN_SCSI_BACKEND
config XEN_PRIVCMD config XEN_PRIVCMD
tristate "Xen hypercall passthrough driver" tristate "Xen hypercall passthrough driver"
depends on XEN depends on XEN
imply CONFIG_XEN_PCIDEV_BACKEND
default m default m
help help
The hypercall passthrough driver allows privileged user programs to The hypercall passthrough driver allows privileged user programs to

View File

@ -30,6 +30,7 @@
* IN THE SOFTWARE. * IN THE SOFTWARE.
*/ */
#include <linux/pci.h>
#include <xen/acpi.h> #include <xen/acpi.h>
#include <xen/interface/platform.h> #include <xen/interface/platform.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
@ -75,3 +76,52 @@ int xen_acpi_notify_hypervisor_extended_sleep(u8 sleep_state,
return xen_acpi_notify_hypervisor_state(sleep_state, val_a, return xen_acpi_notify_hypervisor_state(sleep_state, val_a,
val_b, true); val_b, true);
} }
struct acpi_prt_entry {
struct acpi_pci_id id;
u8 pin;
acpi_handle link;
u32 index;
};
int xen_acpi_get_gsi_info(struct pci_dev *dev,
int *gsi_out,
int *trigger_out,
int *polarity_out)
{
int gsi;
u8 pin;
struct acpi_prt_entry *entry;
int trigger = ACPI_LEVEL_SENSITIVE;
int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ?
ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW;
if (!dev || !gsi_out || !trigger_out || !polarity_out)
return -EINVAL;
pin = dev->pin;
if (!pin)
return -EINVAL;
entry = acpi_pci_irq_lookup(dev, pin);
if (entry) {
if (entry->link)
gsi = acpi_pci_link_allocate_irq(entry->link,
entry->index,
&trigger, &polarity,
NULL);
else
gsi = entry->index;
} else
gsi = -1;
if (gsi < 0)
return -EINVAL;
*gsi_out = gsi;
*trigger_out = trigger;
*polarity_out = polarity;
return 0;
}
EXPORT_SYMBOL_GPL(xen_acpi_get_gsi_info);

View File

@ -173,6 +173,19 @@ static int xen_remove_device(struct device *dev)
return r; return r;
} }
int xen_reset_device(const struct pci_dev *dev)
{
struct pci_device_reset device = {
.dev.seg = pci_domain_nr(dev->bus),
.dev.bus = dev->bus->number,
.dev.devfn = dev->devfn,
.flags = PCI_DEVICE_RESET_FLR,
};
return HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_reset, &device);
}
EXPORT_SYMBOL_GPL(xen_reset_device);
static int xen_pci_notifier(struct notifier_block *nb, static int xen_pci_notifier(struct notifier_block *nb,
unsigned long action, void *data) unsigned long action, void *data)
{ {

View File

@ -46,6 +46,9 @@
#include <xen/page.h> #include <xen/page.h>
#include <xen/xen-ops.h> #include <xen/xen-ops.h>
#include <xen/balloon.h> #include <xen/balloon.h>
#ifdef CONFIG_XEN_ACPI
#include <xen/acpi.h>
#endif
#include "privcmd.h" #include "privcmd.h"
@ -844,6 +847,31 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
return rc; return rc;
} }
static long privcmd_ioctl_pcidev_get_gsi(struct file *file, void __user *udata)
{
#if defined(CONFIG_XEN_ACPI)
int rc = -EINVAL;
struct privcmd_pcidev_get_gsi kdata;
if (copy_from_user(&kdata, udata, sizeof(kdata)))
return -EFAULT;
if (IS_REACHABLE(CONFIG_XEN_PCIDEV_BACKEND))
rc = pcistub_get_gsi_from_sbdf(kdata.sbdf);
if (rc < 0)
return rc;
kdata.gsi = rc;
if (copy_to_user(udata, &kdata, sizeof(kdata)))
return -EFAULT;
return 0;
#else
return -EINVAL;
#endif
}
#ifdef CONFIG_XEN_PRIVCMD_EVENTFD #ifdef CONFIG_XEN_PRIVCMD_EVENTFD
/* Irqfd support */ /* Irqfd support */
static struct workqueue_struct *irqfd_cleanup_wq; static struct workqueue_struct *irqfd_cleanup_wq;
@ -1543,6 +1571,10 @@ static long privcmd_ioctl(struct file *file,
ret = privcmd_ioctl_ioeventfd(file, udata); ret = privcmd_ioctl_ioeventfd(file, udata);
break; break;
case IOCTL_PRIVCMD_PCIDEV_GET_GSI:
ret = privcmd_ioctl_pcidev_get_gsi(file, udata);
break;
default: default:
break; break;
} }

View File

@ -122,7 +122,7 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
if (err) if (err)
goto out; goto out;
new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); new_state = (__force pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
new_value &= PM_OK_BITS; new_value &= PM_OK_BITS;
if ((old_value & PM_OK_BITS) != new_value) { if ((old_value & PM_OK_BITS) != new_value) {

View File

@ -21,6 +21,9 @@
#include <xen/events.h> #include <xen/events.h>
#include <xen/pci.h> #include <xen/pci.h>
#include <xen/xen.h> #include <xen/xen.h>
#ifdef CONFIG_XEN_ACPI
#include <xen/acpi.h>
#endif
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <xen/interface/physdev.h> #include <xen/interface/physdev.h>
#include "pciback.h" #include "pciback.h"
@ -53,6 +56,9 @@ struct pcistub_device {
struct pci_dev *dev; struct pci_dev *dev;
struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */ struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */
#ifdef CONFIG_XEN_ACPI
int gsi;
#endif
}; };
/* Access to pcistub_devices & seized_devices lists and the initialize_devices /* Access to pcistub_devices & seized_devices lists and the initialize_devices
@ -85,10 +91,23 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
kref_init(&psdev->kref); kref_init(&psdev->kref);
spin_lock_init(&psdev->lock); spin_lock_init(&psdev->lock);
#ifdef CONFIG_XEN_ACPI
psdev->gsi = -1;
#endif
return psdev; return psdev;
} }
static int pcistub_reset_device_state(struct pci_dev *dev)
{
__pci_reset_function_locked(dev);
if (!xen_pv_domain())
return xen_reset_device(dev);
else
return 0;
}
/* Don't call this directly as it's called by pcistub_device_put */ /* Don't call this directly as it's called by pcistub_device_put */
static void pcistub_device_release(struct kref *kref) static void pcistub_device_release(struct kref *kref)
{ {
@ -107,7 +126,7 @@ static void pcistub_device_release(struct kref *kref)
/* Call the reset function which does not take lock as this /* Call the reset function which does not take lock as this
* is called from "unbind" which takes a device_lock mutex. * is called from "unbind" which takes a device_lock mutex.
*/ */
__pci_reset_function_locked(dev); pcistub_reset_device_state(dev);
if (dev_data && if (dev_data &&
pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state)) pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
dev_info(&dev->dev, "Could not reload PCI state\n"); dev_info(&dev->dev, "Could not reload PCI state\n");
@ -207,6 +226,25 @@ static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev,
return pci_dev; return pci_dev;
} }
#ifdef CONFIG_XEN_ACPI
int pcistub_get_gsi_from_sbdf(unsigned int sbdf)
{
struct pcistub_device *psdev;
int domain = (sbdf >> 16) & 0xffff;
int bus = PCI_BUS_NUM(sbdf);
int slot = PCI_SLOT(sbdf);
int func = PCI_FUNC(sbdf);
psdev = pcistub_device_find(domain, bus, slot, func);
if (!psdev)
return -ENODEV;
return psdev->gsi;
}
EXPORT_SYMBOL_GPL(pcistub_get_gsi_from_sbdf);
#endif
struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
int domain, int bus, int domain, int bus,
int slot, int func) int slot, int func)
@ -284,7 +322,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
* (so it's ready for the next domain) * (so it's ready for the next domain)
*/ */
device_lock_assert(&dev->dev); device_lock_assert(&dev->dev);
__pci_reset_function_locked(dev); pcistub_reset_device_state(dev);
dev_data = pci_get_drvdata(dev); dev_data = pci_get_drvdata(dev);
ret = pci_load_saved_state(dev, dev_data->pci_saved_state); ret = pci_load_saved_state(dev, dev_data->pci_saved_state);
@ -354,11 +392,20 @@ static int pcistub_match(struct pci_dev *dev)
return found; return found;
} }
static int pcistub_init_device(struct pci_dev *dev) static int pcistub_init_device(struct pcistub_device *psdev)
{ {
struct xen_pcibk_dev_data *dev_data; struct xen_pcibk_dev_data *dev_data;
struct pci_dev *dev;
#ifdef CONFIG_XEN_ACPI
int gsi, trigger, polarity;
#endif
int err = 0; int err = 0;
if (!psdev)
return -EINVAL;
dev = psdev->dev;
dev_dbg(&dev->dev, "initializing...\n"); dev_dbg(&dev->dev, "initializing...\n");
/* The PCI backend is not intended to be a module (or to work with /* The PCI backend is not intended to be a module (or to work with
@ -420,9 +467,26 @@ static int pcistub_init_device(struct pci_dev *dev)
dev_err(&dev->dev, "Could not store PCI conf saved state!\n"); dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
else { else {
dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n"); dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n");
__pci_reset_function_locked(dev); err = pcistub_reset_device_state(dev);
if (err)
goto config_release;
pci_restore_state(dev); pci_restore_state(dev);
} }
#ifdef CONFIG_XEN_ACPI
if (xen_initial_domain() && xen_pvh_domain()) {
err = xen_acpi_get_gsi_info(dev, &gsi, &trigger, &polarity);
if (err) {
dev_err(&dev->dev, "Fail to get gsi info!\n");
goto config_release;
}
err = xen_pvh_setup_gsi(gsi, trigger, polarity);
if (err)
goto config_release;
psdev->gsi = gsi;
}
#endif
/* Now disable the device (this also ensures some private device /* Now disable the device (this also ensures some private device
* data is setup before we export) * data is setup before we export)
*/ */
@ -462,7 +526,7 @@ static int __init pcistub_init_devices_late(void)
spin_unlock_irqrestore(&pcistub_devices_lock, flags); spin_unlock_irqrestore(&pcistub_devices_lock, flags);
err = pcistub_init_device(psdev->dev); err = pcistub_init_device(psdev);
if (err) { if (err) {
dev_err(&psdev->dev->dev, dev_err(&psdev->dev->dev,
"error %d initializing device\n", err); "error %d initializing device\n", err);
@ -532,7 +596,7 @@ static int pcistub_seize(struct pci_dev *dev,
spin_unlock_irqrestore(&pcistub_devices_lock, flags); spin_unlock_irqrestore(&pcistub_devices_lock, flags);
/* don't want irqs disabled when calling pcistub_init_device */ /* don't want irqs disabled when calling pcistub_init_device */
err = pcistub_init_device(psdev->dev); err = pcistub_init_device(psdev);
spin_lock_irqsave(&pcistub_devices_lock, flags); spin_lock_irqsave(&pcistub_devices_lock, flags);
@ -757,7 +821,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
} }
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
res = (pci_ers_result_t)aer_op->err; res = (__force pci_ers_result_t)aer_op->err;
return res; return res;
} }

View File

@ -363,6 +363,7 @@ void acpi_unregister_gsi (u32 gsi);
struct pci_dev; struct pci_dev;
struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin);
int acpi_pci_irq_enable (struct pci_dev *dev); int acpi_pci_irq_enable (struct pci_dev *dev);
void acpi_penalize_isa_irq(int irq, int active); void acpi_penalize_isa_irq(int irq, int active);
bool acpi_isa_irq_available(int irq); bool acpi_isa_irq_available(int irq);

View File

@ -126,6 +126,11 @@ struct privcmd_ioeventfd {
__u8 pad[2]; __u8 pad[2];
}; };
struct privcmd_pcidev_get_gsi {
__u32 sbdf;
__u32 gsi;
};
/* /*
* @cmd: IOCTL_PRIVCMD_HYPERCALL * @cmd: IOCTL_PRIVCMD_HYPERCALL
* @arg: &privcmd_hypercall_t * @arg: &privcmd_hypercall_t
@ -157,5 +162,7 @@ struct privcmd_ioeventfd {
_IOW('P', 8, struct privcmd_irqfd) _IOW('P', 8, struct privcmd_irqfd)
#define IOCTL_PRIVCMD_IOEVENTFD \ #define IOCTL_PRIVCMD_IOEVENTFD \
_IOW('P', 9, struct privcmd_ioeventfd) _IOW('P', 9, struct privcmd_ioeventfd)
#define IOCTL_PRIVCMD_PCIDEV_GET_GSI \
_IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_pcidev_get_gsi))
#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */

View File

@ -67,10 +67,37 @@ static inline void xen_acpi_sleep_register(void)
acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel; acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel;
} }
} }
int xen_pvh_setup_gsi(int gsi, int trigger, int polarity);
int xen_acpi_get_gsi_info(struct pci_dev *dev,
int *gsi_out,
int *trigger_out,
int *polarity_out);
#else #else
static inline void xen_acpi_sleep_register(void) static inline void xen_acpi_sleep_register(void)
{ {
} }
static inline int xen_pvh_setup_gsi(int gsi, int trigger, int polarity)
{
return -1;
}
static inline int xen_acpi_get_gsi_info(struct pci_dev *dev,
int *gsi_out,
int *trigger_out,
int *polarity_out)
{
return -1;
}
#endif
#ifdef CONFIG_XEN_PCI_STUB
int pcistub_get_gsi_from_sbdf(unsigned int sbdf);
#else
static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf)
{
return -1;
}
#endif #endif
#endif /* _XEN_ACPI_H */ #endif /* _XEN_ACPI_H */

View File

@ -11,7 +11,9 @@
#define __XEN_PUBLIC_ELFNOTE_H__ #define __XEN_PUBLIC_ELFNOTE_H__
/* /*
* The notes should live in a SHT_NOTE segment and have "Xen" in the * `incontents 200 elfnotes ELF notes
*
* The notes should live in a PT_NOTE segment and have "Xen" in the
* name field. * name field.
* *
* Numeric types are either 4 or 8 bytes depending on the content of * Numeric types are either 4 or 8 bytes depending on the content of
@ -22,6 +24,8 @@
* *
* String values (for non-legacy) are NULL terminated ASCII, also known * String values (for non-legacy) are NULL terminated ASCII, also known
* as ASCIZ type. * as ASCIZ type.
*
* Xen only uses ELF Notes contained in x86 binaries.
*/ */
/* /*
@ -52,7 +56,7 @@
#define XEN_ELFNOTE_VIRT_BASE 3 #define XEN_ELFNOTE_VIRT_BASE 3
/* /*
* The offset of the ELF paddr field from the acutal required * The offset of the ELF paddr field from the actual required
* pseudo-physical address (numeric). * pseudo-physical address (numeric).
* *
* This is used to maintain backwards compatibility with older kernels * This is used to maintain backwards compatibility with older kernels
@ -92,7 +96,12 @@
#define XEN_ELFNOTE_LOADER 8 #define XEN_ELFNOTE_LOADER 8
/* /*
* The kernel supports PAE (x86/32 only, string = "yes" or "no"). * The kernel supports PAE (x86/32 only, string = "yes", "no" or
* "bimodal").
*
* For compatibility with Xen 3.0.3 and earlier the "bimodal" setting
* may be given as "yes,bimodal" which will cause older Xen to treat
* this kernel as PAE.
* *
* LEGACY: PAE (n.b. The legacy interface included a provision to * LEGACY: PAE (n.b. The legacy interface included a provision to
* indicate 'extended-cr3' support allowing L3 page tables to be * indicate 'extended-cr3' support allowing L3 page tables to be
@ -149,7 +158,9 @@
* The (non-default) location the initial phys-to-machine map should be * The (non-default) location the initial phys-to-machine map should be
* placed at by the hypervisor (Dom0) or the tools (DomU). * placed at by the hypervisor (Dom0) or the tools (DomU).
* The kernel must be prepared for this mapping to be established using * The kernel must be prepared for this mapping to be established using
* large pages, despite such otherwise not being available to guests. * large pages, despite such otherwise not being available to guests. Note
* that these large pages may be misaligned in PFN space (they'll obviously
* be aligned in MFN and virtual address spaces).
* The kernel must also be able to handle the page table pages used for * The kernel must also be able to handle the page table pages used for
* this mapping not being accessible through the initial mapping. * this mapping not being accessible through the initial mapping.
* (Only x86-64 supports this at present.) * (Only x86-64 supports this at present.)
@ -185,9 +196,81 @@
*/ */
#define XEN_ELFNOTE_PHYS32_ENTRY 18 #define XEN_ELFNOTE_PHYS32_ENTRY 18
/*
* Physical loading constraints for PVH kernels
*
* The presence of this note indicates the kernel supports relocating itself.
*
* The note may include up to three 32bit values to place constraints on the
* guest physical loading addresses and alignment for a PVH kernel. Values
* are read in the following order:
* - a required start alignment (default 0x200000)
* - a minimum address for the start of the image (default 0; see below)
* - a maximum address for the last byte of the image (default 0xffffffff)
*
* When this note specifies an alignment value, it is used. Otherwise the
* maximum p_align value from loadable ELF Program Headers is used, if it is
* greater than or equal to 4k (0x1000). Otherwise, the default is used.
*/
#define XEN_ELFNOTE_PHYS32_RELOC 19
/* /*
* The number of the highest elfnote defined. * The number of the highest elfnote defined.
*/ */
#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY #define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_RELOC
/*
* System information exported through crash notes.
*
* The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO
* note in case of a system crash. This note will contain various
* information about the system, see xen/include/xen/elfcore.h.
*/
#define XEN_ELFNOTE_CRASH_INFO 0x1000001
/*
* System registers exported through crash notes.
*
* The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS
* note per cpu in case of a system crash. This note is architecture
* specific and will contain registers not saved in the "CORE" note.
* See xen/include/xen/elfcore.h for more information.
*/
#define XEN_ELFNOTE_CRASH_REGS 0x1000002
/*
* xen dump-core none note.
* xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE
* in its dump file to indicate that the file is xen dump-core
* file. This note doesn't have any other information.
* See tools/libxc/xc_core.h for more information.
*/
#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000
/*
* xen dump-core header note.
* xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER
* in its dump file.
* See tools/libxc/xc_core.h for more information.
*/
#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001
/*
* xen dump-core xen version note.
* xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION
* in its dump file. It contains the xen version obtained via the
* XENVER hypercall.
* See tools/libxc/xc_core.h for more information.
*/
#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002
/*
* xen dump-core format version note.
* xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION
* in its dump file. It contains a format version identifier.
* See tools/libxc/xc_core.h for more information.
*/
#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ #endif /* __XEN_PUBLIC_ELFNOTE_H__ */

View File

@ -256,6 +256,13 @@ struct physdev_pci_device_add {
*/ */
#define PHYSDEVOP_prepare_msix 30 #define PHYSDEVOP_prepare_msix 30
#define PHYSDEVOP_release_msix 31 #define PHYSDEVOP_release_msix 31
/*
* Notify the hypervisor that a PCI device has been reset, so that any
* internally cached state is regenerated. Should be called after any
* device reset performed by the hardware domain.
*/
#define PHYSDEVOP_pci_device_reset 32
struct physdev_pci_device { struct physdev_pci_device {
/* IN */ /* IN */
uint16_t seg; uint16_t seg;
@ -263,6 +270,16 @@ struct physdev_pci_device {
uint8_t devfn; uint8_t devfn;
}; };
struct pci_device_reset {
struct physdev_pci_device dev;
#define PCI_DEVICE_RESET_COLD 0x0
#define PCI_DEVICE_RESET_WARM 0x1
#define PCI_DEVICE_RESET_HOT 0x2
#define PCI_DEVICE_RESET_FLR 0x3
#define PCI_DEVICE_RESET_MASK 0x3
uint32_t flags;
};
#define PHYSDEVOP_DBGP_RESET_PREPARE 1 #define PHYSDEVOP_DBGP_RESET_PREPARE 1
#define PHYSDEVOP_DBGP_RESET_DONE 2 #define PHYSDEVOP_DBGP_RESET_DONE 2

View File

@ -4,10 +4,16 @@
#define __XEN_PCI_H__ #define __XEN_PCI_H__
#if defined(CONFIG_XEN_DOM0) #if defined(CONFIG_XEN_DOM0)
int xen_reset_device(const struct pci_dev *dev);
int xen_find_device_domain_owner(struct pci_dev *dev); int xen_find_device_domain_owner(struct pci_dev *dev);
int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
int xen_unregister_device_domain_owner(struct pci_dev *dev); int xen_unregister_device_domain_owner(struct pci_dev *dev);
#else #else
static inline int xen_reset_device(const struct pci_dev *dev)
{
return -1;
}
static inline int xen_find_device_domain_owner(struct pci_dev *dev) static inline int xen_find_device_domain_owner(struct pci_dev *dev)
{ {
return -1; return -1;