Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'stable/for-linus-3.16-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip into next

Pull Xen updates from David Vrabel:
"xen: features and fixes for 3.16-rc0
- support foreign mappings in PVH domains (needed when dom0 is PVH)

- fix mapping high MMIO regions in x86 PV guests (this is also the
first half of removing the PAGE_IOMAP PTE flag).

- ARM suspend/resume support.

- ARM multicall support"

* tag 'stable/for-linus-3.16-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
x86/xen: map foreign pfns for autotranslated guests
xen-acpi-processor: Don't display errors when we get -ENOSYS
xen/pciback: Document the entry points for 'pcistub_put_pci_dev'
xen/pciback: Document when the 'unbind' and 'bind' functions are called.
xen-pciback: Document when we FLR an PCI device.
xen-pciback: First reset, then free.
xen-pciback: Cleanup up pcistub_put_pci_dev
x86/xen: do not use _PAGE_IOMAP in xen_remap_domain_mfn_range()
x86/xen: set regions above the end of RAM as 1:1
x86/xen: only warn once if bad MFNs are found during setup
x86/xen: compactly store large identity ranges in the p2m
x86/xen: fix set_phys_range_identity() if pfn_e > MAX_P2M_PFN
x86/xen: rename early_p2m_alloc() and early_p2m_alloc_middle()
xen/x86: set panic notifier priority to minimum
arm,arm64/xen: introduce HYPERVISOR_suspend()
xen: refactor suspend pre/post hooks
arm: xen: export HYPERVISOR_multicall to modules.
arm64: introduce virt_to_pfn
arm/xen: Remove definiition of virt_to_pfn in asm/xen/page.h
arm: xen: implement multicall hypercall support.

+334 -128
+11 -5
arch/arm/include/asm/xen/hypercall.h
··· 34 34 #define _ASM_ARM_XEN_HYPERCALL_H 35 35 36 36 #include <xen/interface/xen.h> 37 + #include <xen/interface/sched.h> 37 38 38 39 long privcmd_call(unsigned call, unsigned long a1, 39 40 unsigned long a2, unsigned long a3, ··· 49 48 int HYPERVISOR_physdev_op(int cmd, void *arg); 50 49 int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); 51 50 int HYPERVISOR_tmem_op(void *arg); 51 + int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr); 52 + 53 + static inline int 54 + HYPERVISOR_suspend(unsigned long start_info_mfn) 55 + { 56 + struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; 57 + 58 + /* start_info_mfn is unused on ARM */ 59 + return HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); 60 + } 52 61 53 62 static inline void 54 63 MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, ··· 74 63 BUG(); 75 64 } 76 65 77 - static inline int 78 - HYPERVISOR_multicall(void *call_list, int nr_calls) 79 - { 80 - BUG(); 81 - } 82 66 #endif /* _ASM_ARM_XEN_HYPERCALL_H */
+2
arch/arm/include/asm/xen/interface.h
··· 40 40 #define PRI_xen_pfn "llx" 41 41 typedef uint64_t xen_ulong_t; 42 42 #define PRI_xen_ulong "llx" 43 + typedef int64_t xen_long_t; 44 + #define PRI_xen_long "llx" 43 45 /* Guest handles for primitive C types. */ 44 46 __DEFINE_GUEST_HANDLE(uchar, unsigned char); 45 47 __DEFINE_GUEST_HANDLE(uint, unsigned int);
+9
arch/arm/xen/enlighten.c
··· 339 339 } 340 340 late_initcall(xen_pm_init); 341 341 342 + 343 + /* empty stubs */ 344 + void xen_arch_pre_suspend(void) { } 345 + void xen_arch_post_suspend(int suspend_cancelled) { } 346 + void xen_timer_resume(void) { } 347 + void xen_arch_resume(void) { } 348 + 349 + 342 350 /* In the hypervisor.S file. */ 343 351 EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op); 344 352 EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op); ··· 358 350 EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); 359 351 EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); 360 352 EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); 353 + EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); 361 354 EXPORT_SYMBOL_GPL(privcmd_call);
+1
arch/arm/xen/hypercall.S
··· 89 89 HYPERCALL2(physdev_op); 90 90 HYPERCALL3(vcpu_op); 91 91 HYPERCALL1(tmem_op); 92 + HYPERCALL2(multicall); 92 93 93 94 ENTRY(privcmd_call) 94 95 stmdb sp!, {r4}
+1
arch/arm64/xen/hypercall.S
··· 80 80 HYPERCALL2(physdev_op); 81 81 HYPERCALL3(vcpu_op); 82 82 HYPERCALL1(tmem_op); 83 + HYPERCALL2(multicall); 83 84 84 85 ENTRY(privcmd_call) 85 86 mov x16, x0
+1 -1
arch/x86/include/asm/xen/hypercall.h
··· 343 343 } 344 344 345 345 static inline int 346 - HYPERVISOR_multicall(void *call_list, int nr_calls) 346 + HYPERVISOR_multicall(void *call_list, uint32_t nr_calls) 347 347 { 348 348 return _hypercall2(int, multicall, call_list, nr_calls); 349 349 }
+3
arch/x86/include/asm/xen/interface.h
··· 54 54 #define PRI_xen_pfn "lx" 55 55 typedef unsigned long xen_ulong_t; 56 56 #define PRI_xen_ulong "lx" 57 + typedef long xen_long_t; 58 + #define PRI_xen_long "lx" 59 + 57 60 /* Guest handles for primitive C types. */ 58 61 __DEFINE_GUEST_HANDLE(uchar, unsigned char); 59 62 __DEFINE_GUEST_HANDLE(uint, unsigned int);
+1
arch/x86/xen/enlighten.c
··· 1339 1339 1340 1340 static struct notifier_block xen_panic_block = { 1341 1341 .notifier_call= xen_panic_event, 1342 + .priority = INT_MIN 1342 1343 }; 1343 1344 1344 1345 int xen_panic_handler_init(void)
+119 -6
arch/x86/xen/mmu.c
··· 2510 2510 } 2511 2511 #endif 2512 2512 2513 + #ifdef CONFIG_XEN_PVH 2514 + /* 2515 + * Map foreign gfn (fgfn), to local pfn (lpfn). This for the user 2516 + * space creating new guest on pvh dom0 and needing to map domU pages. 2517 + */ 2518 + static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn, 2519 + unsigned int domid) 2520 + { 2521 + int rc, err = 0; 2522 + xen_pfn_t gpfn = lpfn; 2523 + xen_ulong_t idx = fgfn; 2524 + 2525 + struct xen_add_to_physmap_range xatp = { 2526 + .domid = DOMID_SELF, 2527 + .foreign_domid = domid, 2528 + .size = 1, 2529 + .space = XENMAPSPACE_gmfn_foreign, 2530 + }; 2531 + set_xen_guest_handle(xatp.idxs, &idx); 2532 + set_xen_guest_handle(xatp.gpfns, &gpfn); 2533 + set_xen_guest_handle(xatp.errs, &err); 2534 + 2535 + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); 2536 + if (rc < 0) 2537 + return rc; 2538 + return err; 2539 + } 2540 + 2541 + static int xlate_remove_from_p2m(unsigned long spfn, int count) 2542 + { 2543 + struct xen_remove_from_physmap xrp; 2544 + int i, rc; 2545 + 2546 + for (i = 0; i < count; i++) { 2547 + xrp.domid = DOMID_SELF; 2548 + xrp.gpfn = spfn+i; 2549 + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); 2550 + if (rc) 2551 + break; 2552 + } 2553 + return rc; 2554 + } 2555 + 2556 + struct xlate_remap_data { 2557 + unsigned long fgfn; /* foreign domain's gfn */ 2558 + pgprot_t prot; 2559 + domid_t domid; 2560 + int index; 2561 + struct page **pages; 2562 + }; 2563 + 2564 + static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, 2565 + void *data) 2566 + { 2567 + int rc; 2568 + struct xlate_remap_data *remap = data; 2569 + unsigned long pfn = page_to_pfn(remap->pages[remap->index++]); 2570 + pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot)); 2571 + 2572 + rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid); 2573 + if (rc) 2574 + return rc; 2575 + native_set_pte(ptep, pteval); 2576 + 2577 + return 0; 2578 + } 2579 + 2580 + static int xlate_remap_gfn_range(struct vm_area_struct *vma, 2581 + unsigned long addr, unsigned long mfn, 2582 + int nr, pgprot_t prot, unsigned domid, 2583 + struct page **pages) 2584 + { 2585 + int err; 2586 + struct xlate_remap_data pvhdata; 2587 + 2588 + BUG_ON(!pages); 2589 + 2590 + pvhdata.fgfn = mfn; 2591 + pvhdata.prot = prot; 2592 + pvhdata.domid = domid; 2593 + pvhdata.index = 0; 2594 + pvhdata.pages = pages; 2595 + err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, 2596 + xlate_map_pte_fn, &pvhdata); 2597 + flush_tlb_all(); 2598 + return err; 2599 + } 2600 + #endif 2601 + 2513 2602 #define REMAP_BATCH_SIZE 16 2514 2603 2515 2604 struct remap_data { ··· 2611 2522 unsigned long addr, void *data) 2612 2523 { 2613 2524 struct remap_data *rmd = data; 2614 - pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); 2525 + pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot)); 2615 2526 2616 2527 rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; 2617 2528 rmd->mmu_update->val = pte_val_ma(pte); ··· 2633 2544 unsigned long range; 2634 2545 int err = 0; 2635 2546 2636 - if (xen_feature(XENFEAT_auto_translated_physmap)) 2637 - return -EINVAL; 2638 - 2639 - prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); 2640 - 2641 2547 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); 2548 + 2549 + if (xen_feature(XENFEAT_auto_translated_physmap)) { 2550 + #ifdef CONFIG_XEN_PVH 2551 + /* We need to update the local page tables and the xen HAP */ 2552 + return xlate_remap_gfn_range(vma, addr, mfn, nr, prot, 2553 + domid, pages); 2554 + #else 2555 + return -EINVAL; 2556 + #endif 2557 + } 2642 2558 2643 2559 rmd.mfn = mfn; 2644 2560 rmd.prot = prot; ··· 2682 2588 if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) 2683 2589 return 0; 2684 2590 2591 + #ifdef CONFIG_XEN_PVH 2592 + while (numpgs--) { 2593 + /* 2594 + * The mmu has already cleaned up the process mmu 2595 + * resources at this point (lookup_address will return 2596 + * NULL). 2597 + */ 2598 + unsigned long pfn = page_to_pfn(pages[numpgs]); 2599 + 2600 + xlate_remove_from_p2m(pfn, 1); 2601 + } 2602 + /* 2603 + * We don't need to flush tlbs because as part of 2604 + * xlate_remove_from_p2m, the hypervisor will do tlb flushes 2605 + * after removing the p2m entries from the EPT/NPT 2606 + */ 2607 + return 0; 2608 + #else 2685 2609 return -EINVAL; 2610 + #endif 2686 2611 } 2687 2612 EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
+116 -58
arch/x86/xen/p2m.c
··· 36 36 * pfn_to_mfn(0xc0000)=0xc0000 37 37 * 38 38 * The benefit of this is, that we can assume for non-RAM regions (think 39 - * PCI BARs, or ACPI spaces), we can create mappings easily b/c we 39 + * PCI BARs, or ACPI spaces), we can create mappings easily because we 40 40 * get the PFN value to match the MFN. 41 41 * 42 42 * For this to work efficiently we have one new page p2m_identity and ··· 60 60 * There is also a digram of the P2M at the end that can help. 61 61 * Imagine your E820 looking as so: 62 62 * 63 - * 1GB 2GB 63 + * 1GB 2GB 4GB 64 64 * /-------------------+---------\/----\ /----------\ /---+-----\ 65 65 * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | 66 66 * \-------------------+---------/\----/ \----------/ \---+-----/ ··· 77 77 * of the PFN and the end PFN (263424 and 512256 respectively). The first step 78 78 * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page 79 79 * covers 512^2 of page estate (1GB) and in case the start or end PFN is not 80 - * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn 81 - * to end pfn. We reserve_brk top leaf pages if they are missing (means they 82 - * point to p2m_mid_missing). 80 + * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as 81 + * required to split any existing p2m_mid_missing middle pages. 83 82 * 84 83 * With the E820 example above, 263424 is not 1GB aligned so we allocate a 85 84 * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. ··· 87 88 * Next stage is to determine if we need to do a more granular boundary check 88 89 * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. 89 90 * We check if the start pfn and end pfn violate that boundary check, and if 90 - * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer 91 + * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer 91 92 * granularity of setting which PFNs are missing and which ones are identity. 92 93 * In our example 263424 and 512256 both fail the check so we reserve_brk two 93 94 * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" ··· 101 102 * 102 103 * The next step is to walk from the start pfn to the end pfn setting 103 104 * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. 104 - * If we find that the middle leaf is pointing to p2m_missing we can swap it 105 - * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this 106 - * point we do not need to worry about boundary aligment (so no need to 105 + * If we find that the middle entry is pointing to p2m_missing we can swap it 106 + * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and 107 + * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions). 108 + * At this point we do not need to worry about boundary aligment (so no need to 107 109 * reserve_brk a middle page, figure out which PFNs are "missing" and which 108 110 * ones are identity), as that has been done earlier. If we find that the 109 111 * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference ··· 118 118 * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] 119 119 * contain the INVALID_P2M_ENTRY value and are considered "missing." 120 120 * 121 + * Finally, the region beyond the end of of the E820 (4 GB in this example) 122 + * is set to be identity (in case there are MMIO regions placed here). 123 + * 121 124 * This is what the p2m ends up looking (for the E820 above) with this 122 125 * fabulous drawing: 123 126 * ··· 132 129 * |-----| \ | [p2m_identity]+\\ | .... | 133 130 * | 2 |--\ \-------------------->| ... | \\ \----------------/ 134 131 * |-----| \ \---------------/ \\ 135 - * | 3 |\ \ \\ p2m_identity 136 - * |-----| \ \-------------------->/---------------\ /-----------------\ 137 - * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... | 138 - * \-----/ / | [p2m_identity]+-->| ..., ~0 | 139 - * / /---------------\ | .... | \-----------------/ 140 - * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. | 141 - * / | IDENTITY[@256]|<----/ \---------------/ 142 - * / | ~0, ~0, .... | 143 - * | \---------------/ 144 - * | 145 - * p2m_mid_missing p2m_missing 146 - * /-----------------\ /------------\ 147 - * | [p2m_missing] +---->| ~0, ~0, ~0 | 148 - * | [p2m_missing] +---->| ..., ~0 | 149 - * \-----------------/ \------------/ 132 + * | 3 |-\ \ \\ p2m_identity [1] 133 + * |-----| \ \-------------------->/---------------\ /-----------------\ 134 + * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... | 135 + * \-----/ | | | [p2m_identity]+-->| ..., ~0 | 136 + * | | | .... | \-----------------/ 137 + * | | +-[x], ~0, ~0.. +\ 138 + * | | \---------------/ \ 139 + * | | \-> /---------------\ 140 + * | V p2m_mid_missing p2m_missing | IDENTITY[@0] | 141 + * | /-----------------\ /------------\ | IDENTITY[@256]| 142 + * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... | 143 + * | | [p2m_missing] +---->| ..., ~0 | \---------------/ 144 + * | | ... | \------------/ 145 + * | \-----------------/ 146 + * | 147 + * | p2m_mid_identity 148 + * | /-----------------\ 149 + * \-->| [p2m_identity] +---->[1] 150 + * | [p2m_identity] +---->[1] 151 + * | ... | 152 + * \-----------------/ 150 153 * 151 154 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) 152 155 */ ··· 196 187 static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); 197 188 198 189 static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); 190 + static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); 191 + static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE); 199 192 200 193 RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 201 194 RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 202 195 203 196 /* We might hit two boundary violations at the start and end, at max each 204 197 * boundary violation will require three middle nodes. */ 205 - RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); 198 + RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3); 206 199 207 200 /* When we populate back during bootup, the amount of pages can vary. The 208 201 * max we have is seen is 395979, but that does not mean it can't be more. ··· 253 242 top[i] = p2m_mid_missing_mfn; 254 243 } 255 244 256 - static void p2m_mid_init(unsigned long **mid) 245 + static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) 257 246 { 258 247 unsigned i; 259 248 260 249 for (i = 0; i < P2M_MID_PER_PAGE; i++) 261 - mid[i] = p2m_missing; 250 + mid[i] = leaf; 262 251 } 263 252 264 - static void p2m_mid_mfn_init(unsigned long *mid) 253 + static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) 265 254 { 266 255 unsigned i; 267 256 268 257 for (i = 0; i < P2M_MID_PER_PAGE; i++) 269 - mid[i] = virt_to_mfn(p2m_missing); 258 + mid[i] = virt_to_mfn(leaf); 270 259 } 271 260 272 261 static void p2m_init(unsigned long *p2m) ··· 297 286 /* Pre-initialize p2m_top_mfn to be completely missing */ 298 287 if (p2m_top_mfn == NULL) { 299 288 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 300 - p2m_mid_mfn_init(p2m_mid_missing_mfn); 289 + p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 290 + p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 291 + p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); 301 292 302 293 p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 303 294 p2m_top_mfn_p_init(p2m_top_mfn_p); ··· 308 295 p2m_top_mfn_init(p2m_top_mfn); 309 296 } else { 310 297 /* Reinitialise, mfn's all change after migration */ 311 - p2m_mid_mfn_init(p2m_mid_missing_mfn); 298 + p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 299 + p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); 312 300 } 313 301 314 302 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { ··· 341 327 * it too late. 342 328 */ 343 329 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 344 - p2m_mid_mfn_init(mid_mfn_p); 330 + p2m_mid_mfn_init(mid_mfn_p, p2m_missing); 345 331 346 332 p2m_top_mfn_p[topidx] = mid_mfn_p; 347 333 } ··· 379 365 380 366 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); 381 367 p2m_init(p2m_missing); 368 + p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); 369 + p2m_init(p2m_identity); 382 370 383 371 p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); 384 - p2m_mid_init(p2m_mid_missing); 372 + p2m_mid_init(p2m_mid_missing, p2m_missing); 373 + p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); 374 + p2m_mid_init(p2m_mid_identity, p2m_identity); 385 375 386 376 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); 387 377 p2m_top_init(p2m_top); 388 - 389 - p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); 390 - p2m_init(p2m_identity); 391 378 392 379 /* 393 380 * The domain builder gives us a pre-constructed p2m array in ··· 401 386 402 387 if (p2m_top[topidx] == p2m_mid_missing) { 403 388 unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); 404 - p2m_mid_init(mid); 389 + p2m_mid_init(mid, p2m_missing); 405 390 406 391 p2m_top[topidx] = mid; 407 392 } ··· 507 492 unsigned topidx, mididx, idx; 508 493 509 494 if (unlikely(pfn >= MAX_P2M_PFN)) 510 - return INVALID_P2M_ENTRY; 495 + return IDENTITY_FRAME(pfn); 511 496 512 497 topidx = p2m_top_index(pfn); 513 498 mididx = p2m_mid_index(pfn); ··· 560 545 if (!mid) 561 546 return false; 562 547 563 - p2m_mid_init(mid); 548 + p2m_mid_init(mid, p2m_missing); 564 549 565 550 if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) 566 551 free_p2m_page(mid); ··· 580 565 if (!mid_mfn) 581 566 return false; 582 567 583 - p2m_mid_mfn_init(mid_mfn); 568 + p2m_mid_mfn_init(mid_mfn, p2m_missing); 584 569 585 570 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); 586 571 mid_mfn_mfn = virt_to_mfn(mid_mfn); ··· 611 596 return true; 612 597 } 613 598 614 - static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary) 599 + static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) 615 600 { 616 601 unsigned topidx, mididx, idx; 617 602 unsigned long *p2m; ··· 653 638 return true; 654 639 } 655 640 656 - static bool __init early_alloc_p2m(unsigned long pfn) 641 + static bool __init early_alloc_p2m_middle(unsigned long pfn) 657 642 { 658 643 unsigned topidx = p2m_top_index(pfn); 659 644 unsigned long *mid_mfn_p; ··· 664 649 if (mid == p2m_mid_missing) { 665 650 mid = extend_brk(PAGE_SIZE, PAGE_SIZE); 666 651 667 - p2m_mid_init(mid); 652 + p2m_mid_init(mid, p2m_missing); 668 653 669 654 p2m_top[topidx] = mid; 670 655 ··· 673 658 /* And the save/restore P2M tables.. */ 674 659 if (mid_mfn_p == p2m_mid_missing_mfn) { 675 660 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 676 - p2m_mid_mfn_init(mid_mfn_p); 661 + p2m_mid_mfn_init(mid_mfn_p, p2m_missing); 677 662 678 663 p2m_top_mfn_p[topidx] = mid_mfn_p; 679 664 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); 680 665 /* Note: we don't set mid_mfn_p[midix] here, 681 - * look in early_alloc_p2m_middle */ 666 + * look in early_alloc_p2m() */ 682 667 } 683 668 return true; 684 669 } ··· 754 739 755 740 /* This shouldn't happen */ 756 741 if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) 757 - early_alloc_p2m(set_pfn); 742 + early_alloc_p2m_middle(set_pfn); 758 743 759 744 if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) 760 745 return false; ··· 769 754 bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) 770 755 { 771 756 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 772 - if (!early_alloc_p2m(pfn)) 757 + if (!early_alloc_p2m_middle(pfn)) 773 758 return false; 774 759 775 760 if (early_can_reuse_p2m_middle(pfn, mfn)) 776 761 return __set_phys_to_machine(pfn, mfn); 777 762 778 - if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) 763 + if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) 779 764 return false; 780 765 781 766 if (!__set_phys_to_machine(pfn, mfn)) ··· 784 769 785 770 return true; 786 771 } 772 + 773 + static void __init early_split_p2m(unsigned long pfn) 774 + { 775 + unsigned long mididx, idx; 776 + 777 + mididx = p2m_mid_index(pfn); 778 + idx = p2m_index(pfn); 779 + 780 + /* 781 + * Allocate new middle and leaf pages if this pfn lies in the 782 + * middle of one. 783 + */ 784 + if (mididx || idx) 785 + early_alloc_p2m_middle(pfn); 786 + if (idx) 787 + early_alloc_p2m(pfn, false); 788 + } 789 + 787 790 unsigned long __init set_phys_range_identity(unsigned long pfn_s, 788 791 unsigned long pfn_e) 789 792 { 790 793 unsigned long pfn; 791 794 792 - if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) 795 + if (unlikely(pfn_s >= MAX_P2M_PFN)) 793 796 return 0; 794 797 795 798 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) ··· 816 783 if (pfn_s > pfn_e) 817 784 return 0; 818 785 819 - for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); 820 - pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); 821 - pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) 822 - { 823 - WARN_ON(!early_alloc_p2m(pfn)); 824 - } 786 + if (pfn_e > MAX_P2M_PFN) 787 + pfn_e = MAX_P2M_PFN; 825 788 826 - early_alloc_p2m_middle(pfn_s, true); 827 - early_alloc_p2m_middle(pfn_e, true); 789 + early_split_p2m(pfn_s); 790 + early_split_p2m(pfn_e); 828 791 829 - for (pfn = pfn_s; pfn < pfn_e; pfn++) 792 + for (pfn = pfn_s; pfn < pfn_e;) { 793 + unsigned topidx = p2m_top_index(pfn); 794 + unsigned mididx = p2m_mid_index(pfn); 795 + 830 796 if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) 831 797 break; 798 + pfn++; 799 + 800 + /* 801 + * If the PFN was set to a middle or leaf identity 802 + * page the remainder must also be identity, so skip 803 + * ahead to the next middle or leaf entry. 804 + */ 805 + if (p2m_top[topidx] == p2m_mid_identity) 806 + pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE); 807 + else if (p2m_top[topidx][mididx] == p2m_identity) 808 + pfn = ALIGN(pfn, P2M_PER_PAGE); 809 + } 832 810 833 811 if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), 834 812 "Identity mapping failed. We are %ld short of 1-1 mappings!\n", ··· 869 825 870 826 /* For sparse holes were the p2m leaf has real PFN along with 871 827 * PCI holes, stick in the PFN as the MFN value. 828 + * 829 + * set_phys_range_identity() will have allocated new middle 830 + * and leaf pages as required so an existing p2m_mid_missing 831 + * or p2m_missing mean that whole range will be identity so 832 + * these can be switched to p2m_mid_identity or p2m_identity. 872 833 */ 873 834 if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { 835 + if (p2m_top[topidx] == p2m_mid_identity) 836 + return true; 837 + 838 + if (p2m_top[topidx] == p2m_mid_missing) { 839 + WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing, 840 + p2m_mid_identity) != p2m_mid_missing); 841 + return true; 842 + } 843 + 874 844 if (p2m_top[topidx][mididx] == p2m_identity) 875 845 return true; 876 846
+12 -3
arch/x86/xen/setup.c
··· 89 89 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { 90 90 unsigned long mfn = pfn_to_mfn(pfn); 91 91 92 - if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) 92 + if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) 93 93 continue; 94 - WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", 95 - pfn, mfn); 94 + WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", 95 + pfn, mfn); 96 96 97 97 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 98 98 } ··· 467 467 if (map[i].size == 0) 468 468 i++; 469 469 } 470 + 471 + /* 472 + * Set the rest as identity mapped, in case PCI BARs are 473 + * located here. 474 + * 475 + * PFNs above MAX_P2M_PFN are considered identity mapped as 476 + * well. 477 + */ 478 + set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul); 470 479 471 480 /* 472 481 * In domU, the ISA region is normal, usable memory, but we
+20 -3
arch/x86/xen/suspend.c
··· 12 12 #include "xen-ops.h" 13 13 #include "mmu.h" 14 14 15 - void xen_arch_pre_suspend(void) 15 + static void xen_pv_pre_suspend(void) 16 16 { 17 + xen_mm_pin_all(); 18 + 17 19 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); 18 20 xen_start_info->console.domU.mfn = 19 21 mfn_to_pfn(xen_start_info->console.domU.mfn); ··· 28 26 BUG(); 29 27 } 30 28 31 - void xen_arch_hvm_post_suspend(int suspend_cancelled) 29 + static void xen_hvm_post_suspend(int suspend_cancelled) 32 30 { 33 31 #ifdef CONFIG_XEN_PVHVM 34 32 int cpu; ··· 43 41 #endif 44 42 } 45 43 46 - void xen_arch_post_suspend(int suspend_cancelled) 44 + static void xen_pv_post_suspend(int suspend_cancelled) 47 45 { 48 46 xen_build_mfn_list_list(); 49 47 ··· 62 60 xen_vcpu_restore(); 63 61 } 64 62 63 + xen_mm_unpin_all(); 64 + } 65 + 66 + void xen_arch_pre_suspend(void) 67 + { 68 + if (xen_pv_domain()) 69 + xen_pv_pre_suspend(); 70 + } 71 + 72 + void xen_arch_post_suspend(int cancelled) 73 + { 74 + if (xen_pv_domain()) 75 + xen_pv_post_suspend(cancelled); 76 + else 77 + xen_hvm_post_suspend(cancelled); 65 78 } 66 79 67 80 static void xen_vcpu_notify_restore(void *data)
+2
arch/x86/xen/xen-ops.h
··· 31 31 void xen_reserve_top(void); 32 32 extern unsigned long xen_max_p2m_pfn; 33 33 34 + void xen_mm_pin_all(void); 35 + void xen_mm_unpin_all(void); 34 36 void xen_set_pat(u64); 35 37 36 38 char * __init xen_memory_setup(void);
+7 -38
drivers/xen/manage.c
··· 41 41 42 42 struct suspend_info { 43 43 int cancelled; 44 - unsigned long arg; /* extra hypercall argument */ 45 - void (*pre)(void); 46 - void (*post)(int cancelled); 47 44 }; 48 45 49 46 static RAW_NOTIFIER_HEAD(xen_resume_notifier); ··· 58 61 EXPORT_SYMBOL_GPL(xen_resume_notifier_unregister); 59 62 60 63 #ifdef CONFIG_HIBERNATE_CALLBACKS 61 - static void xen_hvm_post_suspend(int cancelled) 62 - { 63 - xen_arch_hvm_post_suspend(cancelled); 64 - gnttab_resume(); 65 - } 66 - 67 - static void xen_pre_suspend(void) 68 - { 69 - xen_mm_pin_all(); 70 - gnttab_suspend(); 71 - xen_arch_pre_suspend(); 72 - } 73 - 74 - static void xen_post_suspend(int cancelled) 75 - { 76 - xen_arch_post_suspend(cancelled); 77 - gnttab_resume(); 78 - xen_mm_unpin_all(); 79 - } 80 - 81 64 static int xen_suspend(void *data) 82 65 { 83 66 struct suspend_info *si = data; ··· 71 94 return err; 72 95 } 73 96 74 - if (si->pre) 75 - si->pre(); 97 + gnttab_suspend(); 98 + xen_arch_pre_suspend(); 76 99 77 100 /* 78 101 * This hypercall returns 1 if suspend was cancelled 79 102 * or the domain was merely checkpointed, and 0 if it 80 103 * is resuming in a new domain. 81 104 */ 82 - si->cancelled = HYPERVISOR_suspend(si->arg); 105 + si->cancelled = HYPERVISOR_suspend(xen_pv_domain() 106 + ? virt_to_mfn(xen_start_info) 107 + : 0); 83 108 84 - if (si->post) 85 - si->post(si->cancelled); 109 + xen_arch_post_suspend(si->cancelled); 110 + gnttab_resume(); 86 111 87 112 if (!si->cancelled) { 88 113 xen_irq_resume(); ··· 132 153 } 133 154 134 155 si.cancelled = 1; 135 - 136 - if (xen_hvm_domain()) { 137 - si.arg = 0UL; 138 - si.pre = NULL; 139 - si.post = &xen_hvm_post_suspend; 140 - } else { 141 - si.arg = virt_to_mfn(xen_start_info); 142 - si.pre = &xen_pre_suspend; 143 - si.post = &xen_post_suspend; 144 - } 145 156 146 157 err = stop_machine(xen_suspend, &si, cpumask_of(0)); 147 158
+2 -2
drivers/xen/xen-acpi-processor.c
··· 127 127 pr_debug(" C%d: %s %d uS\n", 128 128 cx->type, cx->desc, (u32)cx->latency); 129 129 } 130 - } else if (ret != -EINVAL) 130 + } else if ((ret != -EINVAL) && (ret != -ENOSYS)) 131 131 /* EINVAL means the ACPI ID is incorrect - meaning the ACPI 132 132 * table is referencing a non-existing CPU - which can happen 133 133 * with broken ACPI tables. */ ··· 259 259 (u32) perf->states[i].power, 260 260 (u32) perf->states[i].transition_latency); 261 261 } 262 - } else if (ret != -EINVAL) 262 + } else if ((ret != -EINVAL) && (ret != -ENOSYS)) 263 263 /* EINVAL means the ACPI ID is incorrect - meaning the ACPI 264 264 * table is referencing a non-existing CPU - which can happen 265 265 * with broken ACPI tables. */
+20 -5
drivers/xen/xen-pciback/pci_stub.c
··· 242 242 return found_dev; 243 243 } 244 244 245 + /* 246 + * Called when: 247 + * - XenBus state has been reconfigure (pci unplug). See xen_pcibk_remove_device 248 + * - XenBus state has been disconnected (guest shutdown). See xen_pcibk_xenbus_remove 249 + * - 'echo BDF > unbind' on pciback module with no guest attached. See pcistub_remove 250 + * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove 251 + * 252 + * As such we have to be careful. 253 + */ 245 254 void pcistub_put_pci_dev(struct pci_dev *dev) 246 255 { 247 256 struct pcistub_device *psdev, *found_psdev = NULL; ··· 281 272 * and want to inhibit the user from fiddling with 'reset' 282 273 */ 283 274 pci_reset_function(dev); 284 - pci_restore_state(psdev->dev); 275 + pci_restore_state(dev); 285 276 286 277 /* This disables the device. */ 287 - xen_pcibk_reset_device(found_psdev->dev); 278 + xen_pcibk_reset_device(dev); 288 279 289 280 /* And cleanup up our emulated fields. */ 290 - xen_pcibk_config_free_dyn_fields(found_psdev->dev); 291 - xen_pcibk_config_reset_dev(found_psdev->dev); 281 + xen_pcibk_config_reset_dev(dev); 282 + xen_pcibk_config_free_dyn_fields(dev); 292 283 293 - xen_unregister_device_domain_owner(found_psdev->dev); 284 + xen_unregister_device_domain_owner(dev); 294 285 295 286 spin_lock_irqsave(&found_psdev->lock, flags); 296 287 found_psdev->pdev = NULL; ··· 502 493 return err; 503 494 } 504 495 496 + /* Called when 'bind'. This means we must _NOT_ call pci_reset_function or 497 + * other functions that take the sysfs lock. */ 505 498 static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id) 506 499 { 507 500 int err = 0; ··· 531 520 return err; 532 521 } 533 522 523 + /* Called when 'unbind'. This means we must _NOT_ call pci_reset_function or 524 + * other functions that take the sysfs lock. */ 534 525 static void pcistub_remove(struct pci_dev *dev) 535 526 { 536 527 struct pcistub_device *psdev, *found_psdev = NULL; ··· 564 551 pr_warn("****** shutdown driver domain before binding device\n"); 565 552 pr_warn("****** to other drivers or domains\n"); 566 553 554 + /* N.B. This ends up calling pcistub_put_pci_dev which ends up 555 + * doing the FLR. */ 567 556 xen_pcibk_release_pci_dev(found_psdev->pdev, 568 557 found_psdev->dev); 569 558 }
+4
drivers/xen/xen-pciback/xenbus.c
··· 93 93 94 94 xen_pcibk_disconnect(pdev); 95 95 96 + /* N.B. This calls pcistub_put_pci_dev which does the FLR on all 97 + * of the PCIe devices. */ 96 98 xen_pcibk_release_devices(pdev); 97 99 98 100 dev_set_drvdata(&pdev->xdev->dev, NULL); ··· 288 286 dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); 289 287 xen_unregister_device_domain_owner(dev); 290 288 289 + /* N.B. This ends up calling pcistub_put_pci_dev which ends up 290 + * doing the FLR. */ 291 291 xen_pcibk_release_pci_dev(pdev, dev); 292 292 293 293 out:
+3 -3
include/xen/interface/xen.h
··· 275 275 * NB. The fields are natural register size for this architecture. 276 276 */ 277 277 struct multicall_entry { 278 - unsigned long op; 279 - long result; 280 - unsigned long args[6]; 278 + xen_ulong_t op; 279 + xen_long_t result; 280 + xen_ulong_t args[6]; 281 281 }; 282 282 DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); 283 283
-4
include/xen/xen-ops.h
··· 9 9 10 10 void xen_arch_pre_suspend(void); 11 11 void xen_arch_post_suspend(int suspend_cancelled); 12 - void xen_arch_hvm_post_suspend(int suspend_cancelled); 13 - 14 - void xen_mm_pin_all(void); 15 - void xen_mm_unpin_all(void); 16 12 17 13 void xen_timer_resume(void); 18 14 void xen_arch_resume(void);