Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus-5.11-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen fixes from Juergen Gross:

- A series to fix a regression when running as a fully virtualized
guest on an old Xen hypervisor not supporting PV interrupt callbacks
for HVM guests.

- A patch to add support to query Xen resource sizes (setting was
possible already) from user mode.

* tag 'for-linus-5.11-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
x86/xen: Fix xen_hvm_smp_init() when vector callback not available
x86/xen: Don't register Xen IPIs when they aren't going to be used
x86/xen: Add xen_no_vector_callback option to test PCI INTX delivery
xen: Set platform PCI device INTX affinity to CPU0
xen: Fix event channel callback via INTX/GSI
xen/privcmd: allow fetching resource sizes

+130 -55
+4
Documentation/admin-guide/kernel-parameters.txt
··· 5972 5972 This option is obsoleted by the "nopv" option, which 5973 5973 has equivalent effect for XEN platform. 5974 5974 5975 + xen_no_vector_callback 5976 + [KNL,X86,XEN] Disable the vector callback for Xen 5977 + event channel interrupts. 5978 + 5975 5979 xen_scrub_pages= [XEN] 5976 5980 Boolean option to control scrubbing pages before giving them back 5977 5981 to Xen, for use by other domains. Can be also changed at runtime
+1 -1
arch/arm/xen/enlighten.c
··· 371 371 } 372 372 gnttab_init(); 373 373 if (!xen_initial_domain()) 374 - xenbus_probe(NULL); 374 + xenbus_probe(); 375 375 376 376 /* 377 377 * Making sure board specific code will not set up ops for
+12 -3
arch/x86/xen/enlighten_hvm.c
··· 164 164 else 165 165 per_cpu(xen_vcpu_id, cpu) = cpu; 166 166 rc = xen_vcpu_setup(cpu); 167 - if (rc) 167 + if (rc || !xen_have_vector_callback) 168 168 return rc; 169 169 170 - if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) 170 + if (xen_feature(XENFEAT_hvm_safe_pvclock)) 171 171 xen_setup_timer(cpu); 172 172 173 173 rc = xen_smp_intr_init(cpu); ··· 188 188 return 0; 189 189 } 190 190 191 + static bool no_vector_callback __initdata; 192 + 191 193 static void __init xen_hvm_guest_init(void) 192 194 { 193 195 if (xen_pv_domain()) ··· 209 207 210 208 xen_panic_handler_init(); 211 209 212 - if (xen_feature(XENFEAT_hvm_callback_vector)) 210 + if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector)) 213 211 xen_have_vector_callback = 1; 214 212 215 213 xen_hvm_smp_init(); ··· 234 232 return 0; 235 233 } 236 234 early_param("xen_nopv", xen_parse_nopv); 235 + 236 + static __init int xen_parse_no_vector_callback(char *arg) 237 + { 238 + no_vector_callback = true; 239 + return 0; 240 + } 241 + early_param("xen_no_vector_callback", xen_parse_no_vector_callback); 237 242 238 243 bool __init xen_hvm_need_lapic(void) 239 244 {
+18 -11
arch/x86/xen/smp_hvm.c
··· 33 33 int cpu; 34 34 35 35 native_smp_prepare_cpus(max_cpus); 36 - WARN_ON(xen_smp_intr_init(0)); 37 36 38 - xen_init_lock_cpu(0); 37 + if (xen_have_vector_callback) { 38 + WARN_ON(xen_smp_intr_init(0)); 39 + xen_init_lock_cpu(0); 40 + } 39 41 40 42 for_each_possible_cpu(cpu) { 41 43 if (cpu == 0) ··· 52 50 static void xen_hvm_cpu_die(unsigned int cpu) 53 51 { 54 52 if (common_cpu_die(cpu) == 0) { 55 - xen_smp_intr_free(cpu); 56 - xen_uninit_lock_cpu(cpu); 57 - xen_teardown_timer(cpu); 53 + if (xen_have_vector_callback) { 54 + xen_smp_intr_free(cpu); 55 + xen_uninit_lock_cpu(cpu); 56 + xen_teardown_timer(cpu); 57 + } 58 58 } 59 59 } 60 60 #else ··· 68 64 69 65 void __init xen_hvm_smp_init(void) 70 66 { 71 - if (!xen_have_vector_callback) 72 - return; 73 - 67 + smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; 74 68 smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; 75 - smp_ops.smp_send_reschedule = xen_smp_send_reschedule; 69 + smp_ops.smp_cpus_done = xen_smp_cpus_done; 76 70 smp_ops.cpu_die = xen_hvm_cpu_die; 71 + 72 + if (!xen_have_vector_callback) { 73 + nopvspin = true; 74 + return; 75 + } 76 + 77 + smp_ops.smp_send_reschedule = xen_smp_send_reschedule; 77 78 smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; 78 79 smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; 79 - smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; 80 - smp_ops.smp_cpus_done = xen_smp_cpus_done; 81 80 }
-10
drivers/xen/events/events_base.c
··· 2060 2060 .irq_ack = ack_dynirq, 2061 2061 }; 2062 2062 2063 - int xen_set_callback_via(uint64_t via) 2064 - { 2065 - struct xen_hvm_param a; 2066 - a.domid = DOMID_SELF; 2067 - a.index = HVM_PARAM_CALLBACK_IRQ; 2068 - a.value = via; 2069 - return HYPERVISOR_hvm_op(HVMOP_set_param, &a); 2070 - } 2071 - EXPORT_SYMBOL_GPL(xen_set_callback_via); 2072 - 2073 2063 #ifdef CONFIG_XEN_PVHVM 2074 2064 /* Vector callbacks are better than PCI interrupts to receive event 2075 2065 * channel notifications because we can receive vector callbacks on any
+7 -1
drivers/xen/platform-pci.c
··· 132 132 dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret); 133 133 goto out; 134 134 } 135 + /* 136 + * It doesn't strictly *have* to run on CPU0 but it sure 137 + * as hell better process the event channel ports delivered 138 + * to CPU0. 139 + */ 140 + irq_set_affinity(pdev->irq, cpumask_of(0)); 141 + 135 142 callback_via = get_callback_via(pdev); 136 143 ret = xen_set_callback_via(callback_via); 137 144 if (ret) { ··· 156 149 ret = gnttab_init(); 157 150 if (ret) 158 151 goto grant_out; 159 - xenbus_probe(NULL); 160 152 return 0; 161 153 grant_out: 162 154 gnttab_free_auto_xlat_frames();
+19 -6
drivers/xen/privcmd.c
··· 717 717 return 0; 718 718 } 719 719 720 - static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) 720 + static long privcmd_ioctl_mmap_resource(struct file *file, 721 + struct privcmd_mmap_resource __user *udata) 721 722 { 722 723 struct privcmd_data *data = file->private_data; 723 724 struct mm_struct *mm = current->mm; 724 725 struct vm_area_struct *vma; 725 726 struct privcmd_mmap_resource kdata; 726 727 xen_pfn_t *pfns = NULL; 727 - struct xen_mem_acquire_resource xdata; 728 + struct xen_mem_acquire_resource xdata = { }; 728 729 int rc; 729 730 730 731 if (copy_from_user(&kdata, udata, sizeof(kdata))) ··· 734 733 /* If restriction is in place, check the domid matches */ 735 734 if (data->domid != DOMID_INVALID && data->domid != kdata.dom) 736 735 return -EPERM; 736 + 737 + /* Both fields must be set or unset */ 738 + if (!!kdata.addr != !!kdata.num) 739 + return -EINVAL; 740 + 741 + xdata.domid = kdata.dom; 742 + xdata.type = kdata.type; 743 + xdata.id = kdata.id; 744 + 745 + if (!kdata.addr && !kdata.num) { 746 + /* Query the size of the resource. */ 747 + rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); 748 + if (rc) 749 + return rc; 750 + return __put_user(xdata.nr_frames, &udata->num); 751 + } 737 752 738 753 mmap_write_lock(mm); 739 754 ··· 785 768 } else 786 769 vma->vm_private_data = PRIV_VMA_LOCKED; 787 770 788 - memset(&xdata, 0, sizeof(xdata)); 789 - xdata.domid = kdata.dom; 790 - xdata.type = kdata.type; 791 - xdata.id = kdata.id; 792 771 xdata.frame = kdata.idx; 793 772 xdata.nr_frames = kdata.num; 794 773 set_xen_guest_handle(xdata.frame_list, pfns);
+1
drivers/xen/xenbus/xenbus.h
··· 115 115 const char *type, 116 116 const char *nodename); 117 117 int xenbus_probe_devices(struct xen_bus_type *bus); 118 + void xenbus_probe(void); 118 119 119 120 void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); 120 121
-8
drivers/xen/xenbus/xenbus_comms.c
··· 57 57 static int xenbus_irq; 58 58 static struct task_struct *xenbus_task; 59 59 60 - static DECLARE_WORK(probe_work, xenbus_probe); 61 - 62 - 63 60 static irqreturn_t wake_waiting(int irq, void *unused) 64 61 { 65 - if (unlikely(xenstored_ready == 0)) { 66 - xenstored_ready = 1; 67 - schedule_work(&probe_work); 68 - } 69 - 70 62 wake_up(&xb_waitq); 71 63 return IRQ_HANDLED; 72 64 }
+67 -14
drivers/xen/xenbus/xenbus_probe.c
··· 683 683 } 684 684 EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); 685 685 686 - void xenbus_probe(struct work_struct *unused) 686 + void xenbus_probe(void) 687 687 { 688 688 xenstored_ready = 1; 689 + 690 + /* 691 + * In the HVM case, xenbus_init() deferred its call to 692 + * xs_init() in case callbacks were not operational yet. 693 + * So do it now. 694 + */ 695 + if (xen_store_domain_type == XS_HVM) 696 + xs_init(); 689 697 690 698 /* Notify others that xenstore is up */ 691 699 blocking_notifier_call_chain(&xenstore_chain, 0, NULL); 692 700 } 693 - EXPORT_SYMBOL_GPL(xenbus_probe); 701 + 702 + /* 703 + * Returns true when XenStore init must be deferred in order to 704 + * allow the PCI platform device to be initialised, before we 705 + * can actually have event channel interrupts working. 706 + */ 707 + static bool xs_hvm_defer_init_for_callback(void) 708 + { 709 + #ifdef CONFIG_XEN_PVHVM 710 + return xen_store_domain_type == XS_HVM && 711 + !xen_have_vector_callback; 712 + #else 713 + return false; 714 + #endif 715 + } 694 716 695 717 static int __init xenbus_probe_initcall(void) 696 718 { 697 - if (!xen_domain()) 698 - return -ENODEV; 719 + /* 720 + * Probe XenBus here in the XS_PV case, and also XS_HVM unless we 721 + * need to wait for the platform PCI device to come up. 722 + */ 723 + if (xen_store_domain_type == XS_PV || 724 + (xen_store_domain_type == XS_HVM && 725 + !xs_hvm_defer_init_for_callback())) 726 + xenbus_probe(); 699 727 700 - if (xen_initial_domain() || xen_hvm_domain()) 701 - return 0; 702 - 703 - xenbus_probe(NULL); 704 728 return 0; 705 729 } 706 - 707 730 device_initcall(xenbus_probe_initcall); 731 + 732 + int xen_set_callback_via(uint64_t via) 733 + { 734 + struct xen_hvm_param a; 735 + int ret; 736 + 737 + a.domid = DOMID_SELF; 738 + a.index = HVM_PARAM_CALLBACK_IRQ; 739 + a.value = via; 740 + 741 + ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a); 742 + if (ret) 743 + return ret; 744 + 745 + /* 746 + * If xenbus_probe_initcall() deferred the xenbus_probe() 747 + * due to the callback not functioning yet, we can do it now. 748 + */ 749 + if (!xenstored_ready && xs_hvm_defer_init_for_callback()) 750 + xenbus_probe(); 751 + 752 + return ret; 753 + } 754 + EXPORT_SYMBOL_GPL(xen_set_callback_via); 708 755 709 756 /* Set up event channel for xenstored which is run as a local process 710 757 * (this is normally used only in dom0) ··· 865 818 break; 866 819 } 867 820 868 - /* Initialize the interface to xenstore. */ 869 - err = xs_init(); 870 - if (err) { 871 - pr_warn("Error initializing xenstore comms: %i\n", err); 872 - goto out_error; 821 + /* 822 + * HVM domains may not have a functional callback yet. In that 823 + * case let xs_init() be called from xenbus_probe(), which will 824 + * get invoked at an appropriate time. 825 + */ 826 + if (xen_store_domain_type != XS_HVM) { 827 + err = xs_init(); 828 + if (err) { 829 + pr_warn("Error initializing xenstore comms: %i\n", err); 830 + goto out_error; 831 + } 873 832 } 874 833 875 834 if ((xen_store_domain_type != XS_LOCAL) &&
+1 -1
include/xen/xenbus.h
··· 192 192 193 193 struct work_struct; 194 194 195 - void xenbus_probe(struct work_struct *); 195 + void xenbus_probe(void); 196 196 197 197 #define XENBUS_IS_ERR_READ(str) ({ \ 198 198 if (!IS_ERR(str) && strlen(str) == 0) { \