Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull more xen updates from Juergen Gross:
"A second round of Xen related changes and features:

- a small fix of the xen-pciback driver for a warning issued by
sparse

- support PCI passthrough when using a PVH dom0

- enable loading the kernel in PVH mode at arbitrary addresses,
avoiding conflicts with the memory map when running as a Xen dom0
using the host memory layout"

* tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
x86/pvh: Add 64bit relocation page tables
x86/kernel: Move page table macros to header
x86/pvh: Set phys_base when calling xen_prepare_pvh()
x86/pvh: Make PVH entrypoint PIC for x86-64
xen: sync elfnote.h from xen tree
xen/pciback: fix cast to restricted pci_ers_result_t and pci_power_t
xen/privcmd: Add new syscall to get gsi from dev
xen/pvh: Setup gsi for passthrough device
xen/pci: Add a function to reset device for xen

+509 -47
+22 -1
arch/x86/include/asm/pgtable_64.h
··· 270 270 271 271 #include <asm/pgtable-invert.h> 272 272 273 - #endif /* !__ASSEMBLY__ */ 273 + #else /* __ASSEMBLY__ */ 274 + 275 + #define l4_index(x) (((x) >> 39) & 511) 276 + #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) 277 + 278 + L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) 279 + L4_START_KERNEL = l4_index(__START_KERNEL_map) 280 + 281 + L3_START_KERNEL = pud_index(__START_KERNEL_map) 282 + 283 + #define SYM_DATA_START_PAGE_ALIGNED(name) \ 284 + SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) 285 + 286 + /* Automate the creation of 1 to 1 mapping pmd entries */ 287 + #define PMDS(START, PERM, COUNT) \ 288 + i = 0 ; \ 289 + .rept (COUNT) ; \ 290 + .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ 291 + i = i + 1 ; \ 292 + .endr 293 + 294 + #endif /* __ASSEMBLY__ */ 274 295 #endif /* _ASM_X86_PGTABLE_64_H */
-20
arch/x86/kernel/head_64.S
··· 32 32 * We are not able to switch in one step to the final KERNEL ADDRESS SPACE 33 33 * because we need identity-mapped pages. 34 34 */ 35 - #define l4_index(x) (((x) >> 39) & 511) 36 - #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) 37 - 38 - L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) 39 - L4_START_KERNEL = l4_index(__START_KERNEL_map) 40 - 41 - L3_START_KERNEL = pud_index(__START_KERNEL_map) 42 35 43 36 __HEAD 44 37 .code64 ··· 570 577 SYM_CODE_END(vc_no_ghcb) 571 578 #endif 572 579 573 - #define SYM_DATA_START_PAGE_ALIGNED(name) \ 574 - SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) 575 - 576 580 #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION 577 581 /* 578 582 * Each PGD needs to be 8k long and 8k aligned. We do not ··· 590 600 SYM_DATA_START_PAGE_ALIGNED(name) 591 601 #define PTI_USER_PGD_FILL 0 592 602 #endif 593 - 594 - /* Automate the creation of 1 to 1 mapping pmd entries */ 595 - #define PMDS(START, PERM, COUNT) \ 596 - i = 0 ; \ 597 - .rept (COUNT) ; \ 598 - .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ 599 - i = i + 1 ; \ 600 - .endr 601 603 602 604 __INITDATA 603 605 .balign 4 ··· 689 707 .fill 512,8,0 690 708 .endr 691 709 SYM_DATA_END(level1_fixmap_pgt) 692 - 693 - #undef PMDS 694 710 695 711 .data 696 712 .align 16
+149 -12
arch/x86/platform/pvh/head.S
··· 7 7 .code32 8 8 .text 9 9 #define _pa(x) ((x) - __START_KERNEL_map) 10 + #define rva(x) ((x) - pvh_start_xen) 10 11 11 12 #include <linux/elfnote.h> 12 13 #include <linux/init.h> ··· 16 15 #include <asm/segment.h> 17 16 #include <asm/asm.h> 18 17 #include <asm/boot.h> 18 + #include <asm/pgtable.h> 19 19 #include <asm/processor-flags.h> 20 20 #include <asm/msr.h> 21 21 #include <asm/nospec-branch.h> ··· 56 54 UNWIND_HINT_END_OF_STACK 57 55 cld 58 56 59 - lgdt (_pa(gdt)) 57 + /* 58 + * See the comment for startup_32 for more details. We need to 59 + * execute a call to get the execution address to be position 60 + * independent, but we don't have a stack. Save and restore the 61 + * magic field of start_info in ebx, and use that as the stack. 62 + */ 63 + mov (%ebx), %eax 64 + leal 4(%ebx), %esp 65 + ANNOTATE_INTRA_FUNCTION_CALL 66 + call 1f 67 + 1: popl %ebp 68 + mov %eax, (%ebx) 69 + subl $rva(1b), %ebp 70 + movl $0, %esp 71 + 72 + leal rva(gdt)(%ebp), %eax 73 + leal rva(gdt_start)(%ebp), %ecx 74 + movl %ecx, 2(%eax) 75 + lgdt (%eax) 60 76 61 77 mov $PVH_DS_SEL,%eax 62 78 mov %eax,%ds ··· 82 62 mov %eax,%ss 83 63 84 64 /* Stash hvm_start_info. */ 85 - mov $_pa(pvh_start_info), %edi 65 + leal rva(pvh_start_info)(%ebp), %edi 86 66 mov %ebx, %esi 87 - mov _pa(pvh_start_info_sz), %ecx 67 + movl rva(pvh_start_info_sz)(%ebp), %ecx 88 68 shr $2,%ecx 89 69 rep 90 70 movsl 91 71 92 - mov $_pa(early_stack_end), %esp 72 + leal rva(early_stack_end)(%ebp), %esp 93 73 94 74 /* Enable PAE mode. */ 95 75 mov %cr4, %eax ··· 103 83 btsl $_EFER_LME, %eax 104 84 wrmsr 105 85 86 + mov %ebp, %ebx 87 + subl $_pa(pvh_start_xen), %ebx /* offset */ 88 + jz .Lpagetable_done 89 + 90 + /* Fixup page-tables for relocation. */ 91 + leal rva(pvh_init_top_pgt)(%ebp), %edi 92 + movl $PTRS_PER_PGD, %ecx 93 + 2: 94 + testl $_PAGE_PRESENT, 0x00(%edi) 95 + jz 1f 96 + addl %ebx, 0x00(%edi) 97 + 1: 98 + addl $8, %edi 99 + decl %ecx 100 + jnz 2b 101 + 102 + /* L3 ident has a single entry. */ 103 + leal rva(pvh_level3_ident_pgt)(%ebp), %edi 104 + addl %ebx, 0x00(%edi) 105 + 106 + leal rva(pvh_level3_kernel_pgt)(%ebp), %edi 107 + addl %ebx, (PAGE_SIZE - 16)(%edi) 108 + addl %ebx, (PAGE_SIZE - 8)(%edi) 109 + 110 + /* pvh_level2_ident_pgt is fine - large pages */ 111 + 112 + /* pvh_level2_kernel_pgt needs adjustment - large pages */ 113 + leal rva(pvh_level2_kernel_pgt)(%ebp), %edi 114 + movl $PTRS_PER_PMD, %ecx 115 + 2: 116 + testl $_PAGE_PRESENT, 0x00(%edi) 117 + jz 1f 118 + addl %ebx, 0x00(%edi) 119 + 1: 120 + addl $8, %edi 121 + decl %ecx 122 + jnz 2b 123 + 124 + .Lpagetable_done: 106 125 /* Enable pre-constructed page tables. */ 107 - mov $_pa(init_top_pgt), %eax 126 + leal rva(pvh_init_top_pgt)(%ebp), %eax 108 127 mov %eax, %cr3 109 128 mov $(X86_CR0_PG | X86_CR0_PE), %eax 110 129 mov %eax, %cr0 111 130 112 131 /* Jump to 64-bit mode. */ 113 - ljmp $PVH_CS_SEL, $_pa(1f) 132 + pushl $PVH_CS_SEL 133 + leal rva(1f)(%ebp), %eax 134 + pushl %eax 135 + lretl 114 136 115 137 /* 64-bit entry point. */ 116 138 .code64 117 139 1: 140 + UNWIND_HINT_END_OF_STACK 141 + 118 142 /* Set base address in stack canary descriptor. */ 119 143 mov $MSR_GS_BASE,%ecx 120 - mov $_pa(canary), %eax 144 + leal canary(%rip), %eax 121 145 xor %edx, %edx 122 146 wrmsr 123 147 148 + /* 149 + * Calculate load offset and store in phys_base. __pa() needs 150 + * phys_base set to calculate the hypercall page in xen_pvh_init(). 151 + */ 152 + movq %rbp, %rbx 153 + subq $_pa(pvh_start_xen), %rbx 154 + movq %rbx, phys_base(%rip) 124 155 call xen_prepare_pvh 156 + /* 157 + * Clear phys_base. __startup_64 will *add* to its value, 158 + * so reset to 0. 159 + */ 160 + xor %rbx, %rbx 161 + movq %rbx, phys_base(%rip) 125 162 126 163 /* startup_64 expects boot_params in %rsi. */ 127 - mov $_pa(pvh_bootparams), %rsi 128 - mov $_pa(startup_64), %rax 129 - ANNOTATE_RETPOLINE_SAFE 130 - jmp *%rax 164 + lea pvh_bootparams(%rip), %rsi 165 + jmp startup_64 131 166 132 167 #else /* CONFIG_X86_64 */ 133 168 ··· 218 143 .balign 8 219 144 SYM_DATA_START_LOCAL(gdt) 220 145 .word gdt_end - gdt_start 221 - .long _pa(gdt_start) 146 + .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */ 222 147 .word 0 223 148 SYM_DATA_END(gdt) 224 149 SYM_DATA_START_LOCAL(gdt_start) ··· 237 162 SYM_DATA_START_LOCAL(early_stack) 238 163 .fill BOOT_STACK_SIZE, 1, 0 239 164 SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) 165 + 166 + #ifdef CONFIG_X86_64 167 + /* 168 + * Xen PVH needs a set of identity mapped and kernel high mapping 169 + * page tables. pvh_start_xen starts running on the identity mapped 170 + * page tables, but xen_prepare_pvh calls into the high mapping. 171 + * These page tables need to be relocatable and are only used until 172 + * startup_64 transitions to init_top_pgt. 173 + */ 174 + SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) 175 + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 176 + .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 177 + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 178 + .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 179 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 180 + .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC 181 + SYM_DATA_END(pvh_init_top_pgt) 182 + 183 + SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) 184 + .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 185 + .fill 511, 8, 0 186 + SYM_DATA_END(pvh_level3_ident_pgt) 187 + SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) 188 + /* 189 + * Since I easily can, map the first 1G. 190 + * Don't set NX because code runs from these pages. 191 + * 192 + * Note: This sets _PAGE_GLOBAL despite whether 193 + * the CPU supports it or it is enabled. But, 194 + * the CPU should ignore the bit. 195 + */ 196 + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 197 + SYM_DATA_END(pvh_level2_ident_pgt) 198 + SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) 199 + .fill L3_START_KERNEL, 8, 0 200 + /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 201 + .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC 202 + .quad 0 /* no fixmap */ 203 + SYM_DATA_END(pvh_level3_kernel_pgt) 204 + 205 + SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) 206 + /* 207 + * Kernel high mapping. 208 + * 209 + * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in 210 + * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, 211 + * 512 MiB otherwise. 212 + * 213 + * (NOTE: after that starts the module area, see MODULES_VADDR.) 214 + * 215 + * This table is eventually used by the kernel during normal runtime. 216 + * Care must be taken to clear out undesired bits later, like _PAGE_RW 217 + * or _PAGE_GLOBAL in some cases. 218 + */ 219 + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) 220 + SYM_DATA_END(pvh_level2_kernel_pgt) 221 + 222 + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, 223 + .long CONFIG_PHYSICAL_ALIGN; 224 + .long LOAD_PHYSICAL_ADDR; 225 + .long KERNEL_IMAGE_SIZE - 1) 226 + #endif 240 227 241 228 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, 242 229 _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
+23
arch/x86/xen/enlighten_pvh.c
··· 4 4 #include <linux/mm.h> 5 5 6 6 #include <xen/hvc-console.h> 7 + #include <xen/acpi.h> 7 8 8 9 #include <asm/bootparam.h> 9 10 #include <asm/io_apic.h> ··· 28 27 */ 29 28 bool __ro_after_init xen_pvh; 30 29 EXPORT_SYMBOL_GPL(xen_pvh); 30 + 31 + #ifdef CONFIG_XEN_DOM0 32 + int xen_pvh_setup_gsi(int gsi, int trigger, int polarity) 33 + { 34 + int ret; 35 + struct physdev_setup_gsi setup_gsi; 36 + 37 + setup_gsi.gsi = gsi; 38 + setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1); 39 + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); 40 + 41 + ret = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); 42 + if (ret == -EEXIST) { 43 + xen_raw_printk("Already setup the GSI :%d\n", gsi); 44 + ret = 0; 45 + } else if (ret) 46 + xen_raw_printk("Fail to setup GSI (%d)!\n", gsi); 47 + 48 + return ret; 49 + } 50 + EXPORT_SYMBOL_GPL(xen_pvh_setup_gsi); 51 + #endif 31 52 32 53 /* 33 54 * Reserve e820 UNUSABLE regions to inflate the memory balloon.
+1 -1
drivers/acpi/pci_irq.c
··· 288 288 } 289 289 #endif /* CONFIG_X86_IO_APIC */ 290 290 291 - static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) 291 + struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) 292 292 { 293 293 struct acpi_prt_entry *entry = NULL; 294 294 struct pci_dev *bridge;
+1
drivers/xen/Kconfig
··· 261 261 config XEN_PRIVCMD 262 262 tristate "Xen hypercall passthrough driver" 263 263 depends on XEN 264 + imply CONFIG_XEN_PCIDEV_BACKEND 264 265 default m 265 266 help 266 267 The hypercall passthrough driver allows privileged user programs to
+50
drivers/xen/acpi.c
··· 30 30 * IN THE SOFTWARE. 31 31 */ 32 32 33 + #include <linux/pci.h> 33 34 #include <xen/acpi.h> 34 35 #include <xen/interface/platform.h> 35 36 #include <asm/xen/hypercall.h> ··· 76 75 return xen_acpi_notify_hypervisor_state(sleep_state, val_a, 77 76 val_b, true); 78 77 } 78 + 79 + struct acpi_prt_entry { 80 + struct acpi_pci_id id; 81 + u8 pin; 82 + acpi_handle link; 83 + u32 index; 84 + }; 85 + 86 + int xen_acpi_get_gsi_info(struct pci_dev *dev, 87 + int *gsi_out, 88 + int *trigger_out, 89 + int *polarity_out) 90 + { 91 + int gsi; 92 + u8 pin; 93 + struct acpi_prt_entry *entry; 94 + int trigger = ACPI_LEVEL_SENSITIVE; 95 + int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ? 96 + ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW; 97 + 98 + if (!dev || !gsi_out || !trigger_out || !polarity_out) 99 + return -EINVAL; 100 + 101 + pin = dev->pin; 102 + if (!pin) 103 + return -EINVAL; 104 + 105 + entry = acpi_pci_irq_lookup(dev, pin); 106 + if (entry) { 107 + if (entry->link) 108 + gsi = acpi_pci_link_allocate_irq(entry->link, 109 + entry->index, 110 + &trigger, &polarity, 111 + NULL); 112 + else 113 + gsi = entry->index; 114 + } else 115 + gsi = -1; 116 + 117 + if (gsi < 0) 118 + return -EINVAL; 119 + 120 + *gsi_out = gsi; 121 + *trigger_out = trigger; 122 + *polarity_out = polarity; 123 + 124 + return 0; 125 + } 126 + EXPORT_SYMBOL_GPL(xen_acpi_get_gsi_info);
+13
drivers/xen/pci.c
··· 173 173 return r; 174 174 } 175 175 176 + int xen_reset_device(const struct pci_dev *dev) 177 + { 178 + struct pci_device_reset device = { 179 + .dev.seg = pci_domain_nr(dev->bus), 180 + .dev.bus = dev->bus->number, 181 + .dev.devfn = dev->devfn, 182 + .flags = PCI_DEVICE_RESET_FLR, 183 + }; 184 + 185 + return HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_reset, &device); 186 + } 187 + EXPORT_SYMBOL_GPL(xen_reset_device); 188 + 176 189 static int xen_pci_notifier(struct notifier_block *nb, 177 190 unsigned long action, void *data) 178 191 {
+32
drivers/xen/privcmd.c
··· 46 46 #include <xen/page.h> 47 47 #include <xen/xen-ops.h> 48 48 #include <xen/balloon.h> 49 + #ifdef CONFIG_XEN_ACPI 50 + #include <xen/acpi.h> 51 + #endif 49 52 50 53 #include "privcmd.h" 51 54 ··· 847 844 return rc; 848 845 } 849 846 847 + static long privcmd_ioctl_pcidev_get_gsi(struct file *file, void __user *udata) 848 + { 849 + #if defined(CONFIG_XEN_ACPI) 850 + int rc = -EINVAL; 851 + struct privcmd_pcidev_get_gsi kdata; 852 + 853 + if (copy_from_user(&kdata, udata, sizeof(kdata))) 854 + return -EFAULT; 855 + 856 + if (IS_REACHABLE(CONFIG_XEN_PCIDEV_BACKEND)) 857 + rc = pcistub_get_gsi_from_sbdf(kdata.sbdf); 858 + 859 + if (rc < 0) 860 + return rc; 861 + 862 + kdata.gsi = rc; 863 + if (copy_to_user(udata, &kdata, sizeof(kdata))) 864 + return -EFAULT; 865 + 866 + return 0; 867 + #else 868 + return -EINVAL; 869 + #endif 870 + } 871 + 850 872 #ifdef CONFIG_XEN_PRIVCMD_EVENTFD 851 873 /* Irqfd support */ 852 874 static struct workqueue_struct *irqfd_cleanup_wq; ··· 1569 1541 1570 1542 case IOCTL_PRIVCMD_IOEVENTFD: 1571 1543 ret = privcmd_ioctl_ioeventfd(file, udata); 1544 + break; 1545 + 1546 + case IOCTL_PRIVCMD_PCIDEV_GET_GSI: 1547 + ret = privcmd_ioctl_pcidev_get_gsi(file, udata); 1572 1548 break; 1573 1549 1574 1550 default:
+1 -1
drivers/xen/xen-pciback/conf_space_capability.c
··· 122 122 if (err) 123 123 goto out; 124 124 125 - new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); 125 + new_state = (__force pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); 126 126 127 127 new_value &= PM_OK_BITS; 128 128 if ((old_value & PM_OK_BITS) != new_value) {
+71 -7
drivers/xen/xen-pciback/pci_stub.c
··· 21 21 #include <xen/events.h> 22 22 #include <xen/pci.h> 23 23 #include <xen/xen.h> 24 + #ifdef CONFIG_XEN_ACPI 25 + #include <xen/acpi.h> 26 + #endif 24 27 #include <asm/xen/hypervisor.h> 25 28 #include <xen/interface/physdev.h> 26 29 #include "pciback.h" ··· 56 53 57 54 struct pci_dev *dev; 58 55 struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */ 56 + #ifdef CONFIG_XEN_ACPI 57 + int gsi; 58 + #endif 59 59 }; 60 60 61 61 /* Access to pcistub_devices & seized_devices lists and the initialize_devices ··· 91 85 92 86 kref_init(&psdev->kref); 93 87 spin_lock_init(&psdev->lock); 88 + #ifdef CONFIG_XEN_ACPI 89 + psdev->gsi = -1; 90 + #endif 94 91 95 92 return psdev; 93 + } 94 + 95 + static int pcistub_reset_device_state(struct pci_dev *dev) 96 + { 97 + __pci_reset_function_locked(dev); 98 + 99 + if (!xen_pv_domain()) 100 + return xen_reset_device(dev); 101 + else 102 + return 0; 96 103 } 97 104 98 105 /* Don't call this directly as it's called by pcistub_device_put */ ··· 126 107 /* Call the reset function which does not take lock as this 127 108 * is called from "unbind" which takes a device_lock mutex. 128 109 */ 129 - __pci_reset_function_locked(dev); 110 + pcistub_reset_device_state(dev); 130 111 if (dev_data && 131 112 pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state)) 132 113 dev_info(&dev->dev, "Could not reload PCI state\n"); ··· 226 207 return pci_dev; 227 208 } 228 209 210 + #ifdef CONFIG_XEN_ACPI 211 + int pcistub_get_gsi_from_sbdf(unsigned int sbdf) 212 + { 213 + struct pcistub_device *psdev; 214 + int domain = (sbdf >> 16) & 0xffff; 215 + int bus = PCI_BUS_NUM(sbdf); 216 + int slot = PCI_SLOT(sbdf); 217 + int func = PCI_FUNC(sbdf); 218 + 219 + psdev = pcistub_device_find(domain, bus, slot, func); 220 + 221 + if (!psdev) 222 + return -ENODEV; 223 + 224 + return psdev->gsi; 225 + } 226 + EXPORT_SYMBOL_GPL(pcistub_get_gsi_from_sbdf); 227 + #endif 228 + 229 229 struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, 230 230 int domain, int bus, 231 231 int slot, int func) ··· 322 284 * (so it's ready for the next domain) 323 285 */ 324 286 device_lock_assert(&dev->dev); 325 - __pci_reset_function_locked(dev); 287 + pcistub_reset_device_state(dev); 326 288 327 289 dev_data = pci_get_drvdata(dev); 328 290 ret = pci_load_saved_state(dev, dev_data->pci_saved_state); ··· 392 354 return found; 393 355 } 394 356 395 - static int pcistub_init_device(struct pci_dev *dev) 357 + static int pcistub_init_device(struct pcistub_device *psdev) 396 358 { 397 359 struct xen_pcibk_dev_data *dev_data; 360 + struct pci_dev *dev; 361 + #ifdef CONFIG_XEN_ACPI 362 + int gsi, trigger, polarity; 363 + #endif 398 364 int err = 0; 365 + 366 + if (!psdev) 367 + return -EINVAL; 368 + 369 + dev = psdev->dev; 399 370 400 371 dev_dbg(&dev->dev, "initializing...\n"); 401 372 ··· 467 420 dev_err(&dev->dev, "Could not store PCI conf saved state!\n"); 468 421 else { 469 422 dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n"); 470 - __pci_reset_function_locked(dev); 423 + err = pcistub_reset_device_state(dev); 424 + if (err) 425 + goto config_release; 471 426 pci_restore_state(dev); 472 427 } 428 + 429 + #ifdef CONFIG_XEN_ACPI 430 + if (xen_initial_domain() && xen_pvh_domain()) { 431 + err = xen_acpi_get_gsi_info(dev, &gsi, &trigger, &polarity); 432 + if (err) { 433 + dev_err(&dev->dev, "Fail to get gsi info!\n"); 434 + goto config_release; 435 + } 436 + err = xen_pvh_setup_gsi(gsi, trigger, polarity); 437 + if (err) 438 + goto config_release; 439 + psdev->gsi = gsi; 440 + } 441 + #endif 442 + 473 443 /* Now disable the device (this also ensures some private device 474 444 * data is setup before we export) 475 445 */ ··· 526 462 527 463 spin_unlock_irqrestore(&pcistub_devices_lock, flags); 528 464 529 - err = pcistub_init_device(psdev->dev); 465 + err = pcistub_init_device(psdev); 530 466 if (err) { 531 467 dev_err(&psdev->dev->dev, 532 468 "error %d initializing device\n", err); ··· 596 532 spin_unlock_irqrestore(&pcistub_devices_lock, flags); 597 533 598 534 /* don't want irqs disabled when calling pcistub_init_device */ 599 - err = pcistub_init_device(psdev->dev); 535 + err = pcistub_init_device(psdev); 600 536 601 537 spin_lock_irqsave(&pcistub_devices_lock, flags); 602 538 ··· 821 757 } 822 758 clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); 823 759 824 - res = (pci_ers_result_t)aer_op->err; 760 + res = (__force pci_ers_result_t)aer_op->err; 825 761 return res; 826 762 } 827 763
+1
include/linux/acpi.h
··· 363 363 364 364 struct pci_dev; 365 365 366 + struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin); 366 367 int acpi_pci_irq_enable (struct pci_dev *dev); 367 368 void acpi_penalize_isa_irq(int irq, int active); 368 369 bool acpi_isa_irq_available(int irq);
+7
include/uapi/xen/privcmd.h
··· 126 126 __u8 pad[2]; 127 127 }; 128 128 129 + struct privcmd_pcidev_get_gsi { 130 + __u32 sbdf; 131 + __u32 gsi; 132 + }; 133 + 129 134 /* 130 135 * @cmd: IOCTL_PRIVCMD_HYPERCALL 131 136 * @arg: &privcmd_hypercall_t ··· 162 157 _IOW('P', 8, struct privcmd_irqfd) 163 158 #define IOCTL_PRIVCMD_IOEVENTFD \ 164 159 _IOW('P', 9, struct privcmd_ioeventfd) 160 + #define IOCTL_PRIVCMD_PCIDEV_GET_GSI \ 161 + _IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_pcidev_get_gsi)) 165 162 166 163 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
+27
include/xen/acpi.h
··· 67 67 acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel; 68 68 } 69 69 } 70 + int xen_pvh_setup_gsi(int gsi, int trigger, int polarity); 71 + int xen_acpi_get_gsi_info(struct pci_dev *dev, 72 + int *gsi_out, 73 + int *trigger_out, 74 + int *polarity_out); 70 75 #else 71 76 static inline void xen_acpi_sleep_register(void) 72 77 { 78 + } 79 + 80 + static inline int xen_pvh_setup_gsi(int gsi, int trigger, int polarity) 81 + { 82 + return -1; 83 + } 84 + 85 + static inline int xen_acpi_get_gsi_info(struct pci_dev *dev, 86 + int *gsi_out, 87 + int *trigger_out, 88 + int *polarity_out) 89 + { 90 + return -1; 91 + } 92 + #endif 93 + 94 + #ifdef CONFIG_XEN_PCI_STUB 95 + int pcistub_get_gsi_from_sbdf(unsigned int sbdf); 96 + #else 97 + static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf) 98 + { 99 + return -1; 73 100 } 74 101 #endif 75 102
+88 -5
include/xen/interface/elfnote.h
··· 11 11 #define __XEN_PUBLIC_ELFNOTE_H__ 12 12 13 13 /* 14 - * The notes should live in a SHT_NOTE segment and have "Xen" in the 14 + * `incontents 200 elfnotes ELF notes 15 + * 16 + * The notes should live in a PT_NOTE segment and have "Xen" in the 15 17 * name field. 16 18 * 17 19 * Numeric types are either 4 or 8 bytes depending on the content of ··· 24 22 * 25 23 * String values (for non-legacy) are NULL terminated ASCII, also known 26 24 * as ASCIZ type. 25 + * 26 + * Xen only uses ELF Notes contained in x86 binaries. 27 27 */ 28 28 29 29 /* ··· 56 52 #define XEN_ELFNOTE_VIRT_BASE 3 57 53 58 54 /* 59 - * The offset of the ELF paddr field from the acutal required 55 + * The offset of the ELF paddr field from the actual required 60 56 * pseudo-physical address (numeric). 61 57 * 62 58 * This is used to maintain backwards compatibility with older kernels ··· 96 92 #define XEN_ELFNOTE_LOADER 8 97 93 98 94 /* 99 - * The kernel supports PAE (x86/32 only, string = "yes" or "no"). 95 + * The kernel supports PAE (x86/32 only, string = "yes", "no" or 96 + * "bimodal"). 97 + * 98 + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting 99 + * may be given as "yes,bimodal" which will cause older Xen to treat 100 + * this kernel as PAE. 100 101 * 101 102 * LEGACY: PAE (n.b. The legacy interface included a provision to 102 103 * indicate 'extended-cr3' support allowing L3 page tables to be ··· 158 149 * The (non-default) location the initial phys-to-machine map should be 159 150 * placed at by the hypervisor (Dom0) or the tools (DomU). 160 151 * The kernel must be prepared for this mapping to be established using 161 - * large pages, despite such otherwise not being available to guests. 152 + * large pages, despite such otherwise not being available to guests. Note 153 + * that these large pages may be misaligned in PFN space (they'll obviously 154 + * be aligned in MFN and virtual address spaces). 162 155 * The kernel must also be able to handle the page table pages used for 163 156 * this mapping not being accessible through the initial mapping. 164 157 * (Only x86-64 supports this at present.) ··· 197 186 #define XEN_ELFNOTE_PHYS32_ENTRY 18 198 187 199 188 /* 189 + * Physical loading constraints for PVH kernels 190 + * 191 + * The presence of this note indicates the kernel supports relocating itself. 192 + * 193 + * The note may include up to three 32bit values to place constraints on the 194 + * guest physical loading addresses and alignment for a PVH kernel. Values 195 + * are read in the following order: 196 + * - a required start alignment (default 0x200000) 197 + * - a minimum address for the start of the image (default 0; see below) 198 + * - a maximum address for the last byte of the image (default 0xffffffff) 199 + * 200 + * When this note specifies an alignment value, it is used. Otherwise the 201 + * maximum p_align value from loadable ELF Program Headers is used, if it is 202 + * greater than or equal to 4k (0x1000). Otherwise, the default is used. 203 + */ 204 + #define XEN_ELFNOTE_PHYS32_RELOC 19 205 + 206 + /* 200 207 * The number of the highest elfnote defined. 201 208 */ 202 - #define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY 209 + #define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_RELOC 210 + 211 + /* 212 + * System information exported through crash notes. 213 + * 214 + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO 215 + * note in case of a system crash. This note will contain various 216 + * information about the system, see xen/include/xen/elfcore.h. 217 + */ 218 + #define XEN_ELFNOTE_CRASH_INFO 0x1000001 219 + 220 + /* 221 + * System registers exported through crash notes. 222 + * 223 + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS 224 + * note per cpu in case of a system crash. This note is architecture 225 + * specific and will contain registers not saved in the "CORE" note. 226 + * See xen/include/xen/elfcore.h for more information. 227 + */ 228 + #define XEN_ELFNOTE_CRASH_REGS 0x1000002 229 + 230 + 231 + /* 232 + * xen dump-core none note. 233 + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE 234 + * in its dump file to indicate that the file is xen dump-core 235 + * file. This note doesn't have any other information. 236 + * See tools/libxc/xc_core.h for more information. 237 + */ 238 + #define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 239 + 240 + /* 241 + * xen dump-core header note. 242 + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER 243 + * in its dump file. 244 + * See tools/libxc/xc_core.h for more information. 245 + */ 246 + #define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 247 + 248 + /* 249 + * xen dump-core xen version note. 250 + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION 251 + * in its dump file. It contains the xen version obtained via the 252 + * XENVER hypercall. 253 + * See tools/libxc/xc_core.h for more information. 254 + */ 255 + #define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 256 + 257 + /* 258 + * xen dump-core format version note. 259 + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 260 + * in its dump file. It contains a format version identifier. 261 + * See tools/libxc/xc_core.h for more information. 262 + */ 263 + #define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 203 264 204 265 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */
+17
include/xen/interface/physdev.h
··· 256 256 */ 257 257 #define PHYSDEVOP_prepare_msix 30 258 258 #define PHYSDEVOP_release_msix 31 259 + /* 260 + * Notify the hypervisor that a PCI device has been reset, so that any 261 + * internally cached state is regenerated. Should be called after any 262 + * device reset performed by the hardware domain. 263 + */ 264 + #define PHYSDEVOP_pci_device_reset 32 265 + 259 266 struct physdev_pci_device { 260 267 /* IN */ 261 268 uint16_t seg; 262 269 uint8_t bus; 263 270 uint8_t devfn; 271 + }; 272 + 273 + struct pci_device_reset { 274 + struct physdev_pci_device dev; 275 + #define PCI_DEVICE_RESET_COLD 0x0 276 + #define PCI_DEVICE_RESET_WARM 0x1 277 + #define PCI_DEVICE_RESET_HOT 0x2 278 + #define PCI_DEVICE_RESET_FLR 0x3 279 + #define PCI_DEVICE_RESET_MASK 0x3 280 + uint32_t flags; 264 281 }; 265 282 266 283 #define PHYSDEVOP_DBGP_RESET_PREPARE 1
+6
include/xen/pci.h
··· 4 4 #define __XEN_PCI_H__ 5 5 6 6 #if defined(CONFIG_XEN_DOM0) 7 + int xen_reset_device(const struct pci_dev *dev); 7 8 int xen_find_device_domain_owner(struct pci_dev *dev); 8 9 int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); 9 10 int xen_unregister_device_domain_owner(struct pci_dev *dev); 10 11 #else 12 + static inline int xen_reset_device(const struct pci_dev *dev) 13 + { 14 + return -1; 15 + } 16 + 11 17 static inline int xen_find_device_domain_owner(struct pci_dev *dev) 12 18 { 13 19 return -1;