Merge tag 'for-linus-4.4-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

+10

arch/arm/include/asm/xen/hypervisor.h

··· 26 26 static inline void xen_early_init(void) { return; } 27 27 #endif 28 28 29 + #ifdef CONFIG_HOTPLUG_CPU 30 + static inline void xen_arch_register_cpu(int num) 31 + { 32 + } 33 + 34 + static inline void xen_arch_unregister_cpu(int num) 35 + { 36 + } 37 + #endif 38 + 29 39 #endif /* _ASM_ARM_XEN_HYPERVISOR_H */

+17 -9

arch/arm/include/asm/xen/page-coherent.h

··· 35 35 dma_addr_t dev_addr, unsigned long offset, size_t size, 36 36 enum dma_data_direction dir, struct dma_attrs *attrs) 37 37 { 38 - bool local = PFN_DOWN(dev_addr) == page_to_pfn(page); 39 - /* Dom0 is mapped 1:1, so if pfn == mfn the page is local otherwise 40 - * is a foreign page grant-mapped in dom0. If the page is local we 41 - * can safely call the native dma_ops function, otherwise we call 42 - * the xen specific function. */ 38 + bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page); 39 + /* 40 + * Dom0 is mapped 1:1, while the Linux page can be spanned accross 41 + * multiple Xen page, it's not possible to have a mix of local and 42 + * foreign Xen page. So if the first xen_pfn == mfn the page is local 43 + * otherwise it's a foreign page grant-mapped in dom0. If the page is 44 + * local we can safely call the native dma_ops function, otherwise we 45 + * call the xen specific function. 46 + */ 43 47 if (local) 44 48 __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); 45 49 else ··· 55 51 struct dma_attrs *attrs) 56 52 { 57 53 unsigned long pfn = PFN_DOWN(handle); 58 - /* Dom0 is mapped 1:1, so calling pfn_valid on a foreign mfn will 59 - * always return false. If the page is local we can safely call the 60 - * native dma_ops function, otherwise we call the xen specific 61 - * function. */ 54 + /* 55 + * Dom0 is mapped 1:1, while the Linux page can be spanned accross 56 + * multiple Xen page, it's not possible to have a mix of local and 57 + * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a 58 + * foreign mfn will always return false. If the page is local we can 59 + * safely call the native dma_ops function, otherwise we call the xen 60 + * specific function. 61 + */ 62 62 if (pfn_valid(pfn)) { 63 63 if (__generic_dma_ops(hwdev)->unmap_page) 64 64 __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);

+15 -7

arch/arm/include/asm/xen/page.h

··· 13 13 14 14 #define phys_to_machine_mapping_valid(pfn) (1) 15 15 16 - #define pte_mfn pte_pfn 17 - #define mfn_pte pfn_pte 18 - 19 16 /* Xen machine address */ 20 17 typedef struct xmaddr { 21 18 phys_addr_t maddr; ··· 27 30 #define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) 28 31 29 32 #define INVALID_P2M_ENTRY (~0UL) 33 + 34 + /* 35 + * The pseudo-physical frame (pfn) used in all the helpers is always based 36 + * on Xen page granularity (i.e 4KB). 37 + * 38 + * A Linux page may be split across multiple non-contiguous Xen page so we 39 + * have to keep track with frame based on 4KB page granularity. 40 + * 41 + * PV drivers should never make a direct usage of those helpers (particularly 42 + * pfn_to_gfn and gfn_to_pfn). 43 + */ 30 44 31 45 unsigned long __pfn_to_mfn(unsigned long pfn); 32 46 extern struct rb_root phys_to_mach; ··· 75 67 #define bfn_to_local_pfn(bfn) bfn_to_pfn(bfn) 76 68 77 69 /* VIRT <-> GUEST conversion */ 78 - #define virt_to_gfn(v) (pfn_to_gfn(virt_to_pfn(v))) 79 - #define gfn_to_virt(m) (__va(gfn_to_pfn(m) << PAGE_SHIFT)) 70 + #define virt_to_gfn(v) (pfn_to_gfn(virt_to_phys(v) >> XEN_PAGE_SHIFT)) 71 + #define gfn_to_virt(m) (__va(gfn_to_pfn(m) << XEN_PAGE_SHIFT)) 80 72 81 73 /* Only used in PV code. But ARM guests are always HVM. */ 82 74 static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr) ··· 115 107 #define xen_unmap(cookie) iounmap((cookie)) 116 108 117 109 bool xen_arch_need_swiotlb(struct device *dev, 118 - unsigned long pfn, 119 - unsigned long bfn); 110 + phys_addr_t phys, 111 + dma_addr_t dev_addr); 120 112 unsigned long xen_get_swiotlb_free_pages(unsigned int order); 121 113 122 114 #endif /* _ASM_ARM_XEN_PAGE_H */

+16 -4

arch/arm/xen/enlighten.c

··· 86 86 int err; 87 87 int cpu = get_cpu(); 88 88 89 + /* 90 + * VCPUOP_register_vcpu_info cannot be called twice for the same 91 + * vcpu, so if vcpu_info is already registered, just get out. This 92 + * can happen with cpu-hotplug. 93 + */ 94 + if (per_cpu(xen_vcpu, cpu) != NULL) 95 + goto after_register_vcpu_info; 96 + 89 97 pr_info("Xen: initializing cpu%d\n", cpu); 90 98 vcpup = per_cpu_ptr(xen_vcpu_info, cpu); 91 99 92 - info.mfn = __pa(vcpup) >> PAGE_SHIFT; 93 - info.offset = offset_in_page(vcpup); 100 + info.mfn = virt_to_gfn(vcpup); 101 + info.offset = xen_offset_in_page(vcpup); 94 102 95 103 err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); 96 104 BUG_ON(err); 97 105 per_cpu(xen_vcpu, cpu) = vcpup; 98 106 107 + after_register_vcpu_info: 99 108 enable_percpu_irq(xen_events_irq, 0); 100 109 put_cpu(); 101 110 } ··· 132 123 switch (action) { 133 124 case CPU_STARTING: 134 125 xen_percpu_init(); 126 + break; 127 + case CPU_DYING: 128 + disable_percpu_irq(xen_events_irq); 135 129 break; 136 130 default: 137 131 break; ··· 225 213 xatp.domid = DOMID_SELF; 226 214 xatp.idx = 0; 227 215 xatp.space = XENMAPSPACE_shared_info; 228 - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; 216 + xatp.gpfn = virt_to_gfn(shared_info_page); 229 217 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 230 218 BUG(); 231 219 ··· 296 284 void xen_arch_suspend(void) { } 297 285 298 286 299 - /* In the hypervisor.S file. */ 287 + /* In the hypercall.S file. */ 300 288 EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op); 301 289 EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op); 302 290 EXPORT_SYMBOL_GPL(HYPERVISOR_xen_version);

+29 -10

arch/arm/xen/mm.c

··· 48 48 size_t size, enum dma_data_direction dir, enum dma_cache_op op) 49 49 { 50 50 struct gnttab_cache_flush cflush; 51 - unsigned long pfn; 51 + unsigned long xen_pfn; 52 52 size_t left = size; 53 53 54 - pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE; 55 - offset %= PAGE_SIZE; 54 + xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE; 55 + offset %= XEN_PAGE_SIZE; 56 56 57 57 do { 58 58 size_t len = left; 59 59 60 60 /* buffers in highmem or foreign pages cannot cross page 61 61 * boundaries */ 62 - if (len + offset > PAGE_SIZE) 63 - len = PAGE_SIZE - offset; 62 + if (len + offset > XEN_PAGE_SIZE) 63 + len = XEN_PAGE_SIZE - offset; 64 64 65 65 cflush.op = 0; 66 - cflush.a.dev_bus_addr = pfn << PAGE_SHIFT; 66 + cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT; 67 67 cflush.offset = offset; 68 68 cflush.length = len; 69 69 ··· 79 79 HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); 80 80 81 81 offset = 0; 82 - pfn++; 82 + xen_pfn++; 83 83 left -= len; 84 84 } while (left); 85 85 } ··· 138 138 } 139 139 140 140 bool xen_arch_need_swiotlb(struct device *dev, 141 - unsigned long pfn, 142 - unsigned long bfn) 141 + phys_addr_t phys, 142 + dma_addr_t dev_addr) 143 143 { 144 - return (!hypercall_cflush && (pfn != bfn) && !is_device_dma_coherent(dev)); 144 + unsigned int xen_pfn = XEN_PFN_DOWN(phys); 145 + unsigned int bfn = XEN_PFN_DOWN(dev_addr); 146 + 147 + /* 148 + * The swiotlb buffer should be used if 149 + * - Xen doesn't have the cache flush hypercall 150 + * - The Linux page refers to foreign memory 151 + * - The device doesn't support coherent DMA request 152 + * 153 + * The Linux page may be spanned acrros multiple Xen page, although 154 + * it's not possible to have a mix of local and foreign Xen page. 155 + * Furthermore, range_straddles_page_boundary is already checking 156 + * if buffer is physically contiguous in the host RAM. 157 + * 158 + * Therefore we only need to check the first Xen page to know if we 159 + * require a bounce buffer because the device doesn't support coherent 160 + * memory and we are not able to flush the cache. 161 + */ 162 + return (!hypercall_cflush && (xen_pfn != bfn) && 163 + !is_device_dma_coherent(dev)); 145 164 } 146 165 147 166 int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,

+3 -3

arch/arm/xen/p2m.c

··· 93 93 for (i = 0; i < count; i++) { 94 94 if (map_ops[i].status) 95 95 continue; 96 - set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT, 97 - map_ops[i].dev_bus_addr >> PAGE_SHIFT); 96 + set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, 97 + map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT); 98 98 } 99 99 100 100 return 0; ··· 108 108 int i; 109 109 110 110 for (i = 0; i < count; i++) { 111 - set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT, 111 + set_phys_to_machine(unmap_ops[i].host_addr >> XEN_PAGE_SHIFT, 112 112 INVALID_P2M_ENTRY); 113 113 } 114 114

+5

arch/x86/include/asm/xen/hypervisor.h

··· 57 57 } 58 58 #endif 59 59 60 + #ifdef CONFIG_HOTPLUG_CPU 61 + void xen_arch_register_cpu(int num); 62 + void xen_arch_unregister_cpu(int num); 63 + #endif 64 + 60 65 #endif /* _ASM_X86_XEN_HYPERVISOR_H */

+5 -3

arch/x86/include/asm/xen/page.h

··· 12 12 #include <asm/pgtable.h> 13 13 14 14 #include <xen/interface/xen.h> 15 - #include <xen/grant_table.h> 15 + #include <xen/interface/grant_table.h> 16 16 #include <xen/features.h> 17 17 18 18 /* Xen machine address */ ··· 42 42 extern unsigned long *xen_p2m_addr; 43 43 extern unsigned long xen_p2m_size; 44 44 extern unsigned long xen_max_p2m_pfn; 45 + 46 + extern int xen_alloc_p2m_entry(unsigned long pfn); 45 47 46 48 extern unsigned long get_phys_to_machine(unsigned long pfn); 47 49 extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); ··· 298 296 #define xen_unmap(cookie) iounmap((cookie)) 299 297 300 298 static inline bool xen_arch_need_swiotlb(struct device *dev, 301 - unsigned long pfn, 302 - unsigned long bfn) 299 + phys_addr_t phys, 300 + dma_addr_t dev_addr) 303 301 { 304 302 return false; 305 303 }

+15

arch/x86/xen/enlighten.c

··· 75 75 #include <asm/mwait.h> 76 76 #include <asm/pci_x86.h> 77 77 #include <asm/pat.h> 78 + #include <asm/cpu.h> 78 79 79 80 #ifdef CONFIG_ACPI 80 81 #include <linux/acpi.h> ··· 1900 1899 .set_cpu_features = xen_set_cpu_features, 1901 1900 }; 1902 1901 EXPORT_SYMBOL(x86_hyper_xen); 1902 + 1903 + #ifdef CONFIG_HOTPLUG_CPU 1904 + void xen_arch_register_cpu(int num) 1905 + { 1906 + arch_register_cpu(num); 1907 + } 1908 + EXPORT_SYMBOL(xen_arch_register_cpu); 1909 + 1910 + void xen_arch_unregister_cpu(int num) 1911 + { 1912 + arch_unregister_cpu(num); 1913 + } 1914 + EXPORT_SYMBOL(xen_arch_unregister_cpu); 1915 + #endif

+1 -1

arch/x86/xen/grant-table.c

··· 133 133 kfree(pages); 134 134 return -ENOMEM; 135 135 } 136 - rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */); 136 + rc = alloc_xenballooned_pages(nr_grant_frames, pages); 137 137 if (rc) { 138 138 pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, 139 139 nr_grant_frames, rc);

+1

arch/x86/xen/mmu.c

··· 2888 2888 addr += range; 2889 2889 if (err_ptr) 2890 2890 err_ptr += batch; 2891 + cond_resched(); 2891 2892 } 2892 2893 out: 2893 2894

+13 -6

arch/x86/xen/p2m.c

··· 530 530 * the new pages are installed with cmpxchg; if we lose the race then 531 531 * simply free the page we allocated and use the one that's there. 532 532 */ 533 - static bool alloc_p2m(unsigned long pfn) 533 + int xen_alloc_p2m_entry(unsigned long pfn) 534 534 { 535 535 unsigned topidx; 536 536 unsigned long *top_mfn_p, *mid_mfn; ··· 540 540 unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); 541 541 unsigned long p2m_pfn; 542 542 543 + if (xen_feature(XENFEAT_auto_translated_physmap)) 544 + return 0; 545 + 543 546 ptep = lookup_address(addr, &level); 544 547 BUG_ON(!ptep || level != PG_LEVEL_4K); 545 548 pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); ··· 551 548 /* PMD level is missing, allocate a new one */ 552 549 ptep = alloc_p2m_pmd(addr, pte_pg); 553 550 if (!ptep) 554 - return false; 551 + return -ENOMEM; 555 552 } 556 553 557 554 if (p2m_top_mfn && pfn < MAX_P2M_PFN) { ··· 569 566 570 567 mid_mfn = alloc_p2m_page(); 571 568 if (!mid_mfn) 572 - return false; 569 + return -ENOMEM; 573 570 574 571 p2m_mid_mfn_init(mid_mfn, p2m_missing); 575 572 ··· 595 592 596 593 p2m = alloc_p2m_page(); 597 594 if (!p2m) 598 - return false; 595 + return -ENOMEM; 599 596 600 597 if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) 601 598 p2m_init(p2m); ··· 628 625 HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; 629 626 } 630 627 631 - return true; 628 + return 0; 632 629 } 630 + EXPORT_SYMBOL(xen_alloc_p2m_entry); 633 631 634 632 unsigned long __init set_phys_range_identity(unsigned long pfn_s, 635 633 unsigned long pfn_e) ··· 692 688 bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) 693 689 { 694 690 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 695 - if (!alloc_p2m(pfn)) 691 + int ret; 692 + 693 + ret = xen_alloc_p2m_entry(pfn); 694 + if (ret < 0) 696 695 return false; 697 696 698 697 return __set_phys_to_machine(pfn, mfn);

+6 -3

arch/x86/xen/setup.c

··· 212 212 e_pfn = PFN_DOWN(entry->addr + entry->size); 213 213 214 214 /* We only care about E820 after this */ 215 - if (e_pfn < *min_pfn) 215 + if (e_pfn <= *min_pfn) 216 216 continue; 217 217 218 218 s_pfn = PFN_UP(entry->addr); ··· 829 829 addr = xen_e820_map[0].addr; 830 830 size = xen_e820_map[0].size; 831 831 while (i < xen_e820_map_entries) { 832 + bool discard = false; 833 + 832 834 chunk_size = size; 833 835 type = xen_e820_map[i].type; 834 836 ··· 845 843 xen_add_extra_mem(pfn_s, n_pfns); 846 844 xen_max_p2m_pfn = pfn_s + n_pfns; 847 845 } else 848 - type = E820_UNUSABLE; 846 + discard = true; 849 847 } 850 848 851 - xen_align_and_add_e820_region(addr, chunk_size, type); 849 + if (!discard) 850 + xen_align_and_add_e820_region(addr, chunk_size, type); 852 851 853 852 addr += chunk_size; 854 853 size -= chunk_size;

+7 -6

drivers/block/xen-blkback/blkback.c

··· 87 87 * Maximum order of pages to be used for the shared ring between front and 88 88 * backend, 4KB page granularity is used. 89 89 */ 90 - unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; 90 + unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER; 91 91 module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); 92 92 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); 93 93 /* ··· 961 961 seg[n].nsec = segments[i].last_sect - 962 962 segments[i].first_sect + 1; 963 963 seg[n].offset = (segments[i].first_sect << 9); 964 - if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) || 964 + if ((segments[i].last_sect >= (XEN_PAGE_SIZE >> 9)) || 965 965 (segments[i].last_sect < segments[i].first_sect)) { 966 966 rc = -EINVAL; 967 967 goto unmap; ··· 1210 1210 1211 1211 req_operation = req->operation == BLKIF_OP_INDIRECT ? 1212 1212 req->u.indirect.indirect_op : req->operation; 1213 + 1213 1214 if ((req->operation == BLKIF_OP_INDIRECT) && 1214 1215 (req_operation != BLKIF_OP_READ) && 1215 1216 (req_operation != BLKIF_OP_WRITE)) { ··· 1269 1268 seg[i].nsec = req->u.rw.seg[i].last_sect - 1270 1269 req->u.rw.seg[i].first_sect + 1; 1271 1270 seg[i].offset = (req->u.rw.seg[i].first_sect << 9); 1272 - if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || 1271 + if ((req->u.rw.seg[i].last_sect >= (XEN_PAGE_SIZE >> 9)) || 1273 1272 (req->u.rw.seg[i].last_sect < 1274 1273 req->u.rw.seg[i].first_sect)) 1275 1274 goto fail_response; ··· 1446 1445 if (!xen_domain()) 1447 1446 return -ENODEV; 1448 1447 1449 - if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { 1448 + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) { 1450 1449 pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", 1451 - xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); 1452 - xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; 1450 + xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER); 1451 + xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER; 1453 1452 } 1454 1453 1455 1454 rc = xen_blkif_interface_init();

+13 -4

drivers/block/xen-blkback/common.h

··· 39 39 #include <asm/pgalloc.h> 40 40 #include <asm/hypervisor.h> 41 41 #include <xen/grant_table.h> 42 + #include <xen/page.h> 42 43 #include <xen/xenbus.h> 43 44 #include <xen/interface/io/ring.h> 44 45 #include <xen/interface/io/blkif.h> ··· 52 51 */ 53 52 #define MAX_INDIRECT_SEGMENTS 256 54 53 55 - #define SEGS_PER_INDIRECT_FRAME \ 56 - (PAGE_SIZE/sizeof(struct blkif_request_segment)) 54 + /* 55 + * Xen use 4K pages. The guest may use different page size (4K or 64K) 56 + * Number of Xen pages per segment 57 + */ 58 + #define XEN_PAGES_PER_SEGMENT (PAGE_SIZE / XEN_PAGE_SIZE) 59 + 60 + #define XEN_PAGES_PER_INDIRECT_FRAME \ 61 + (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment)) 62 + #define SEGS_PER_INDIRECT_FRAME \ 63 + (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT) 64 + 57 65 #define MAX_INDIRECT_PAGES \ 58 66 ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) 59 - #define INDIRECT_PAGES(_segs) \ 60 - ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) 67 + #define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME) 61 68 62 69 /* Not a real protocol. Used to generate ring structs which contain 63 70 * the elements common to all protocols only. This way we get a

+7 -4

drivers/block/xen-blkback/xenbus.c

··· 176 176 { 177 177 struct blkif_sring *sring; 178 178 sring = (struct blkif_sring *)blkif->blk_ring; 179 - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs); 179 + BACK_RING_INIT(&blkif->blk_rings.native, sring, 180 + XEN_PAGE_SIZE * nr_grefs); 180 181 break; 181 182 } 182 183 case BLKIF_PROTOCOL_X86_32: 183 184 { 184 185 struct blkif_x86_32_sring *sring_x86_32; 185 186 sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; 186 - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs); 187 + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, 188 + XEN_PAGE_SIZE * nr_grefs); 187 189 break; 188 190 } 189 191 case BLKIF_PROTOCOL_X86_64: 190 192 { 191 193 struct blkif_x86_64_sring *sring_x86_64; 192 194 sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; 193 - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs); 195 + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, 196 + XEN_PAGE_SIZE * nr_grefs); 194 197 break; 195 198 } 196 199 default: ··· 829 826 static int connect_ring(struct backend_info *be) 830 827 { 831 828 struct xenbus_device *dev = be->dev; 832 - unsigned int ring_ref[XENBUS_MAX_RING_PAGES]; 829 + unsigned int ring_ref[XENBUS_MAX_RING_GRANTS]; 833 830 unsigned int evtchn, nr_grefs, ring_page_order; 834 831 unsigned int pers_grants; 835 832 char protocol[64] = "";

+364 -204

drivers/block/xen-blkfront.c

··· 68 68 69 69 struct grant { 70 70 grant_ref_t gref; 71 - unsigned long pfn; 71 + struct page *page; 72 72 struct list_head node; 73 73 }; 74 74 ··· 78 78 struct grant **grants_used; 79 79 struct grant **indirect_grants; 80 80 struct scatterlist *sg; 81 + unsigned int num_sg; 81 82 }; 82 83 83 84 struct split_bio { ··· 107 106 module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); 108 107 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); 109 108 110 - #define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages) 111 - #define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES) 109 + #define BLK_RING_SIZE(info) \ 110 + __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) 111 + 112 + #define BLK_MAX_RING_SIZE \ 113 + __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS) 114 + 112 115 /* 113 116 * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 114 117 * characters are enough. Define to 20 to keep consist with backend. ··· 133 128 int vdevice; 134 129 blkif_vdev_t handle; 135 130 enum blkif_state connected; 136 - int ring_ref[XENBUS_MAX_RING_PAGES]; 131 + int ring_ref[XENBUS_MAX_RING_GRANTS]; 137 132 unsigned int nr_ring_pages; 138 133 struct blkif_front_ring ring; 139 134 unsigned int evtchn, irq; ··· 151 146 unsigned int discard_granularity; 152 147 unsigned int discard_alignment; 153 148 unsigned int feature_persistent:1; 149 + /* Number of 4KB segments handled */ 154 150 unsigned int max_indirect_segments; 155 151 int is_ready; 156 152 struct blk_mq_tag_set tag_set; ··· 180 174 181 175 #define DEV_NAME "xvd" /* name in /dev */ 182 176 183 - #define SEGS_PER_INDIRECT_FRAME \ 184 - (PAGE_SIZE/sizeof(struct blkif_request_segment)) 185 - #define INDIRECT_GREFS(_segs) \ 186 - ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) 177 + /* 178 + * Grants are always the same size as a Xen page (i.e 4KB). 179 + * A physical segment is always the same size as a Linux page. 180 + * Number of grants per physical segment 181 + */ 182 + #define GRANTS_PER_PSEG (PAGE_SIZE / XEN_PAGE_SIZE) 183 + 184 + #define GRANTS_PER_INDIRECT_FRAME \ 185 + (XEN_PAGE_SIZE / sizeof(struct blkif_request_segment)) 186 + 187 + #define PSEGS_PER_INDIRECT_FRAME \ 188 + (GRANTS_INDIRECT_FRAME / GRANTS_PSEGS) 189 + 190 + #define INDIRECT_GREFS(_grants) \ 191 + DIV_ROUND_UP(_grants, GRANTS_PER_INDIRECT_FRAME) 192 + 193 + #define GREFS(_psegs) ((_psegs) * GRANTS_PER_PSEG) 187 194 188 195 static int blkfront_setup_indirect(struct blkfront_info *info); 189 196 static int blkfront_gather_backend_features(struct blkfront_info *info); ··· 240 221 kfree(gnt_list_entry); 241 222 goto out_of_memory; 242 223 } 243 - gnt_list_entry->pfn = page_to_pfn(granted_page); 224 + gnt_list_entry->page = granted_page; 244 225 } 245 226 246 227 gnt_list_entry->gref = GRANT_INVALID_REF; ··· 255 236 &info->grants, node) { 256 237 list_del(&gnt_list_entry->node); 257 238 if (info->feature_persistent) 258 - __free_page(pfn_to_page(gnt_list_entry->pfn)); 239 + __free_page(gnt_list_entry->page); 259 240 kfree(gnt_list_entry); 260 241 i--; 261 242 } ··· 263 244 return -ENOMEM; 264 245 } 265 246 266 - static struct grant *get_grant(grant_ref_t *gref_head, 267 - unsigned long pfn, 268 - struct blkfront_info *info) 247 + static struct grant *get_free_grant(struct blkfront_info *info) 269 248 { 270 249 struct grant *gnt_list_entry; 271 - unsigned long buffer_gfn; 272 250 273 251 BUG_ON(list_empty(&info->grants)); 274 252 gnt_list_entry = list_first_entry(&info->grants, struct grant, 275 - node); 253 + node); 276 254 list_del(&gnt_list_entry->node); 277 255 278 - if (gnt_list_entry->gref != GRANT_INVALID_REF) { 256 + if (gnt_list_entry->gref != GRANT_INVALID_REF) 279 257 info->persistent_gnts_c--; 258 + 259 + return gnt_list_entry; 260 + } 261 + 262 + static inline void grant_foreign_access(const struct grant *gnt_list_entry, 263 + const struct blkfront_info *info) 264 + { 265 + gnttab_page_grant_foreign_access_ref_one(gnt_list_entry->gref, 266 + info->xbdev->otherend_id, 267 + gnt_list_entry->page, 268 + 0); 269 + } 270 + 271 + static struct grant *get_grant(grant_ref_t *gref_head, 272 + unsigned long gfn, 273 + struct blkfront_info *info) 274 + { 275 + struct grant *gnt_list_entry = get_free_grant(info); 276 + 277 + if (gnt_list_entry->gref != GRANT_INVALID_REF) 280 278 return gnt_list_entry; 279 + 280 + /* Assign a gref to this page */ 281 + gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 282 + BUG_ON(gnt_list_entry->gref == -ENOSPC); 283 + if (info->feature_persistent) 284 + grant_foreign_access(gnt_list_entry, info); 285 + else { 286 + /* Grant access to the GFN passed by the caller */ 287 + gnttab_grant_foreign_access_ref(gnt_list_entry->gref, 288 + info->xbdev->otherend_id, 289 + gfn, 0); 281 290 } 291 + 292 + return gnt_list_entry; 293 + } 294 + 295 + static struct grant *get_indirect_grant(grant_ref_t *gref_head, 296 + struct blkfront_info *info) 297 + { 298 + struct grant *gnt_list_entry = get_free_grant(info); 299 + 300 + if (gnt_list_entry->gref != GRANT_INVALID_REF) 301 + return gnt_list_entry; 282 302 283 303 /* Assign a gref to this page */ 284 304 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 285 305 BUG_ON(gnt_list_entry->gref == -ENOSPC); 286 306 if (!info->feature_persistent) { 287 - BUG_ON(!pfn); 288 - gnt_list_entry->pfn = pfn; 307 + struct page *indirect_page; 308 + 309 + /* Fetch a pre-allocated page to use for indirect grefs */ 310 + BUG_ON(list_empty(&info->indirect_pages)); 311 + indirect_page = list_first_entry(&info->indirect_pages, 312 + struct page, lru); 313 + list_del(&indirect_page->lru); 314 + gnt_list_entry->page = indirect_page; 289 315 } 290 - buffer_gfn = pfn_to_gfn(gnt_list_entry->pfn); 291 - gnttab_grant_foreign_access_ref(gnt_list_entry->gref, 292 - info->xbdev->otherend_id, 293 - buffer_gfn, 0); 316 + grant_foreign_access(gnt_list_entry, info); 317 + 294 318 return gnt_list_entry; 295 319 } 296 320 ··· 456 394 return 0; 457 395 } 458 396 459 - /* 460 - * Generate a Xen blkfront IO request from a blk layer request. Reads 461 - * and writes are handled as expected. 462 - * 463 - * @req: a request struct 464 - */ 465 - static int blkif_queue_request(struct request *req) 397 + static int blkif_queue_discard_req(struct request *req) 466 398 { 467 399 struct blkfront_info *info = req->rq_disk->private_data; 468 400 struct blkif_request *ring_req; 469 401 unsigned long id; 402 + 403 + /* Fill out a communications ring structure. */ 404 + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 405 + id = get_id_from_freelist(info); 406 + info->shadow[id].request = req; 407 + 408 + ring_req->operation = BLKIF_OP_DISCARD; 409 + ring_req->u.discard.nr_sectors = blk_rq_sectors(req); 410 + ring_req->u.discard.id = id; 411 + ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); 412 + if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) 413 + ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; 414 + else 415 + ring_req->u.discard.flag = 0; 416 + 417 + info->ring.req_prod_pvt++; 418 + 419 + /* Keep a private copy so we can reissue requests when recovering. */ 420 + info->shadow[id].req = *ring_req; 421 + 422 + return 0; 423 + } 424 + 425 + struct setup_rw_req { 426 + unsigned int grant_idx; 427 + struct blkif_request_segment *segments; 428 + struct blkfront_info *info; 429 + struct blkif_request *ring_req; 430 + grant_ref_t gref_head; 431 + unsigned int id; 432 + /* Only used when persistent grant is used and it's a read request */ 433 + bool need_copy; 434 + unsigned int bvec_off; 435 + char *bvec_data; 436 + }; 437 + 438 + static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, 439 + unsigned int len, void *data) 440 + { 441 + struct setup_rw_req *setup = data; 442 + int n, ref; 443 + struct grant *gnt_list_entry; 470 444 unsigned int fsect, lsect; 471 - int i, ref, n; 472 - struct blkif_request_segment *segments = NULL; 445 + /* Convenient aliases */ 446 + unsigned int grant_idx = setup->grant_idx; 447 + struct blkif_request *ring_req = setup->ring_req; 448 + struct blkfront_info *info = setup->info; 449 + struct blk_shadow *shadow = &info->shadow[setup->id]; 450 + 451 + if ((ring_req->operation == BLKIF_OP_INDIRECT) && 452 + (grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) { 453 + if (setup->segments) 454 + kunmap_atomic(setup->segments); 455 + 456 + n = grant_idx / GRANTS_PER_INDIRECT_FRAME; 457 + gnt_list_entry = get_indirect_grant(&setup->gref_head, info); 458 + shadow->indirect_grants[n] = gnt_list_entry; 459 + setup->segments = kmap_atomic(gnt_list_entry->page); 460 + ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; 461 + } 462 + 463 + gnt_list_entry = get_grant(&setup->gref_head, gfn, info); 464 + ref = gnt_list_entry->gref; 465 + shadow->grants_used[grant_idx] = gnt_list_entry; 466 + 467 + if (setup->need_copy) { 468 + void *shared_data; 469 + 470 + shared_data = kmap_atomic(gnt_list_entry->page); 471 + /* 472 + * this does not wipe data stored outside the 473 + * range sg->offset..sg->offset+sg->length. 474 + * Therefore, blkback *could* see data from 475 + * previous requests. This is OK as long as 476 + * persistent grants are shared with just one 477 + * domain. It may need refactoring if this 478 + * changes 479 + */ 480 + memcpy(shared_data + offset, 481 + setup->bvec_data + setup->bvec_off, 482 + len); 483 + 484 + kunmap_atomic(shared_data); 485 + setup->bvec_off += len; 486 + } 487 + 488 + fsect = offset >> 9; 489 + lsect = fsect + (len >> 9) - 1; 490 + if (ring_req->operation != BLKIF_OP_INDIRECT) { 491 + ring_req->u.rw.seg[grant_idx] = 492 + (struct blkif_request_segment) { 493 + .gref = ref, 494 + .first_sect = fsect, 495 + .last_sect = lsect }; 496 + } else { 497 + setup->segments[grant_idx % GRANTS_PER_INDIRECT_FRAME] = 498 + (struct blkif_request_segment) { 499 + .gref = ref, 500 + .first_sect = fsect, 501 + .last_sect = lsect }; 502 + } 503 + 504 + (setup->grant_idx)++; 505 + } 506 + 507 + static int blkif_queue_rw_req(struct request *req) 508 + { 509 + struct blkfront_info *info = req->rq_disk->private_data; 510 + struct blkif_request *ring_req; 511 + unsigned long id; 512 + int i; 513 + struct setup_rw_req setup = { 514 + .grant_idx = 0, 515 + .segments = NULL, 516 + .info = info, 517 + .need_copy = rq_data_dir(req) && info->feature_persistent, 518 + }; 473 519 474 520 /* 475 521 * Used to store if we are able to queue the request by just using ··· 585 415 * as there are not sufficiently many free. 586 416 */ 587 417 bool new_persistent_gnts; 588 - grant_ref_t gref_head; 589 - struct grant *gnt_list_entry = NULL; 590 418 struct scatterlist *sg; 591 - int nseg, max_grefs; 419 + int num_sg, max_grefs, num_grant; 592 420 593 - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 594 - return 1; 595 - 596 - max_grefs = req->nr_phys_segments; 421 + max_grefs = req->nr_phys_segments * GRANTS_PER_PSEG; 597 422 if (max_grefs > BLKIF_MAX_SEGMENTS_PER_REQUEST) 598 423 /* 599 424 * If we are using indirect segments we need to account 600 425 * for the indirect grefs used in the request. 601 426 */ 602 - max_grefs += INDIRECT_GREFS(req->nr_phys_segments); 427 + max_grefs += INDIRECT_GREFS(max_grefs); 603 428 604 429 /* Check if we have enough grants to allocate a requests */ 605 430 if (info->persistent_gnts_c < max_grefs) { 606 431 new_persistent_gnts = 1; 607 432 if (gnttab_alloc_grant_references( 608 433 max_grefs - info->persistent_gnts_c, 609 - &gref_head) < 0) { 434 + &setup.gref_head) < 0) { 610 435 gnttab_request_free_callback( 611 436 &info->callback, 612 437 blkif_restart_queue_callback, ··· 617 452 id = get_id_from_freelist(info); 618 453 info->shadow[id].request = req; 619 454 620 - if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { 621 - ring_req->operation = BLKIF_OP_DISCARD; 622 - ring_req->u.discard.nr_sectors = blk_rq_sectors(req); 623 - ring_req->u.discard.id = id; 624 - ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); 625 - if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) 626 - ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; 627 - else 628 - ring_req->u.discard.flag = 0; 455 + BUG_ON(info->max_indirect_segments == 0 && 456 + GREFS(req->nr_phys_segments) > BLKIF_MAX_SEGMENTS_PER_REQUEST); 457 + BUG_ON(info->max_indirect_segments && 458 + GREFS(req->nr_phys_segments) > info->max_indirect_segments); 459 + 460 + num_sg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); 461 + num_grant = 0; 462 + /* Calculate the number of grant used */ 463 + for_each_sg(info->shadow[id].sg, sg, num_sg, i) 464 + num_grant += gnttab_count_grant(sg->offset, sg->length); 465 + 466 + ring_req->u.rw.id = id; 467 + info->shadow[id].num_sg = num_sg; 468 + if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST) { 469 + /* 470 + * The indirect operation can only be a BLKIF_OP_READ or 471 + * BLKIF_OP_WRITE 472 + */ 473 + BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA)); 474 + ring_req->operation = BLKIF_OP_INDIRECT; 475 + ring_req->u.indirect.indirect_op = rq_data_dir(req) ? 476 + BLKIF_OP_WRITE : BLKIF_OP_READ; 477 + ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req); 478 + ring_req->u.indirect.handle = info->handle; 479 + ring_req->u.indirect.nr_segments = num_grant; 629 480 } else { 630 - BUG_ON(info->max_indirect_segments == 0 && 631 - req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 632 - BUG_ON(info->max_indirect_segments && 633 - req->nr_phys_segments > info->max_indirect_segments); 634 - nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); 635 - ring_req->u.rw.id = id; 636 - if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) { 481 + ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); 482 + ring_req->u.rw.handle = info->handle; 483 + ring_req->operation = rq_data_dir(req) ? 484 + BLKIF_OP_WRITE : BLKIF_OP_READ; 485 + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { 637 486 /* 638 - * The indirect operation can only be a BLKIF_OP_READ or 639 - * BLKIF_OP_WRITE 487 + * Ideally we can do an unordered flush-to-disk. 488 + * In case the backend onlysupports barriers, use that. 489 + * A barrier request a superset of FUA, so we can 490 + * implement it the same way. (It's also a FLUSH+FUA, 491 + * since it is guaranteed ordered WRT previous writes.) 640 492 */ 641 - BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA)); 642 - ring_req->operation = BLKIF_OP_INDIRECT; 643 - ring_req->u.indirect.indirect_op = rq_data_dir(req) ? 644 - BLKIF_OP_WRITE : BLKIF_OP_READ; 645 - ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req); 646 - ring_req->u.indirect.handle = info->handle; 647 - ring_req->u.indirect.nr_segments = nseg; 648 - } else { 649 - ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); 650 - ring_req->u.rw.handle = info->handle; 651 - ring_req->operation = rq_data_dir(req) ? 652 - BLKIF_OP_WRITE : BLKIF_OP_READ; 653 - if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { 654 - /* 655 - * Ideally we can do an unordered flush-to-disk. In case the 656 - * backend onlysupports barriers, use that. A barrier request 657 - * a superset of FUA, so we can implement it the same 658 - * way. (It's also a FLUSH+FUA, since it is 659 - * guaranteed ordered WRT previous writes.) 660 - */ 661 - switch (info->feature_flush & 662 - ((REQ_FLUSH|REQ_FUA))) { 663 - case REQ_FLUSH|REQ_FUA: 664 - ring_req->operation = 665 - BLKIF_OP_WRITE_BARRIER; 666 - break; 667 - case REQ_FLUSH: 668 - ring_req->operation = 669 - BLKIF_OP_FLUSH_DISKCACHE; 670 - break; 671 - default: 672 - ring_req->operation = 0; 673 - } 674 - } 675 - ring_req->u.rw.nr_segments = nseg; 676 - } 677 - for_each_sg(info->shadow[id].sg, sg, nseg, i) { 678 - fsect = sg->offset >> 9; 679 - lsect = fsect + (sg->length >> 9) - 1; 680 - 681 - if ((ring_req->operation == BLKIF_OP_INDIRECT) && 682 - (i % SEGS_PER_INDIRECT_FRAME == 0)) { 683 - unsigned long uninitialized_var(pfn); 684 - 685 - if (segments) 686 - kunmap_atomic(segments); 687 - 688 - n = i / SEGS_PER_INDIRECT_FRAME; 689 - if (!info->feature_persistent) { 690 - struct page *indirect_page; 691 - 692 - /* Fetch a pre-allocated page to use for indirect grefs */ 693 - BUG_ON(list_empty(&info->indirect_pages)); 694 - indirect_page = list_first_entry(&info->indirect_pages, 695 - struct page, lru); 696 - list_del(&indirect_page->lru); 697 - pfn = page_to_pfn(indirect_page); 698 - } 699 - gnt_list_entry = get_grant(&gref_head, pfn, info); 700 - info->shadow[id].indirect_grants[n] = gnt_list_entry; 701 - segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); 702 - ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; 703 - } 704 - 705 - gnt_list_entry = get_grant(&gref_head, page_to_pfn(sg_page(sg)), info); 706 - ref = gnt_list_entry->gref; 707 - 708 - info->shadow[id].grants_used[i] = gnt_list_entry; 709 - 710 - if (rq_data_dir(req) && info->feature_persistent) { 711 - char *bvec_data; 712 - void *shared_data; 713 - 714 - BUG_ON(sg->offset + sg->length > PAGE_SIZE); 715 - 716 - shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); 717 - bvec_data = kmap_atomic(sg_page(sg)); 718 - 719 - /* 720 - * this does not wipe data stored outside the 721 - * range sg->offset..sg->offset+sg->length. 722 - * Therefore, blkback *could* see data from 723 - * previous requests. This is OK as long as 724 - * persistent grants are shared with just one 725 - * domain. It may need refactoring if this 726 - * changes 727 - */ 728 - memcpy(shared_data + sg->offset, 729 - bvec_data + sg->offset, 730 - sg->length); 731 - 732 - kunmap_atomic(bvec_data); 733 - kunmap_atomic(shared_data); 734 - } 735 - if (ring_req->operation != BLKIF_OP_INDIRECT) { 736 - ring_req->u.rw.seg[i] = 737 - (struct blkif_request_segment) { 738 - .gref = ref, 739 - .first_sect = fsect, 740 - .last_sect = lsect }; 741 - } else { 742 - n = i % SEGS_PER_INDIRECT_FRAME; 743 - segments[n] = 744 - (struct blkif_request_segment) { 745 - .gref = ref, 746 - .first_sect = fsect, 747 - .last_sect = lsect }; 493 + switch (info->feature_flush & 494 + ((REQ_FLUSH|REQ_FUA))) { 495 + case REQ_FLUSH|REQ_FUA: 496 + ring_req->operation = 497 + BLKIF_OP_WRITE_BARRIER; 498 + break; 499 + case REQ_FLUSH: 500 + ring_req->operation = 501 + BLKIF_OP_FLUSH_DISKCACHE; 502 + break; 503 + default: 504 + ring_req->operation = 0; 748 505 } 749 506 } 750 - if (segments) 751 - kunmap_atomic(segments); 507 + ring_req->u.rw.nr_segments = num_grant; 752 508 } 509 + 510 + setup.ring_req = ring_req; 511 + setup.id = id; 512 + for_each_sg(info->shadow[id].sg, sg, num_sg, i) { 513 + BUG_ON(sg->offset + sg->length > PAGE_SIZE); 514 + 515 + if (setup.need_copy) { 516 + setup.bvec_off = sg->offset; 517 + setup.bvec_data = kmap_atomic(sg_page(sg)); 518 + } 519 + 520 + gnttab_foreach_grant_in_range(sg_page(sg), 521 + sg->offset, 522 + sg->length, 523 + blkif_setup_rw_req_grant, 524 + &setup); 525 + 526 + if (setup.need_copy) 527 + kunmap_atomic(setup.bvec_data); 528 + } 529 + if (setup.segments) 530 + kunmap_atomic(setup.segments); 753 531 754 532 info->ring.req_prod_pvt++; 755 533 ··· 700 592 info->shadow[id].req = *ring_req; 701 593 702 594 if (new_persistent_gnts) 703 - gnttab_free_grant_references(gref_head); 595 + gnttab_free_grant_references(setup.gref_head); 704 596 705 597 return 0; 706 598 } 707 599 600 + /* 601 + * Generate a Xen blkfront IO request from a blk layer request. Reads 602 + * and writes are handled as expected. 603 + * 604 + * @req: a request struct 605 + */ 606 + static int blkif_queue_request(struct request *req) 607 + { 608 + struct blkfront_info *info = req->rq_disk->private_data; 609 + 610 + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 611 + return 1; 612 + 613 + if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) 614 + return blkif_queue_discard_req(req); 615 + else 616 + return blkif_queue_rw_req(req); 617 + } 708 618 709 619 static inline void flush_requests(struct blkfront_info *info) 710 620 { ··· 817 691 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 818 692 blk_queue_logical_block_size(rq, sector_size); 819 693 blk_queue_physical_block_size(rq, physical_sector_size); 820 - blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512); 694 + blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); 821 695 822 696 /* Each segment in a request is up to an aligned page in size. */ 823 697 blk_queue_segment_boundary(rq, PAGE_SIZE - 1); 824 698 blk_queue_max_segment_size(rq, PAGE_SIZE); 825 699 826 700 /* Ensure a merged request will fit in a single I/O ring slot. */ 827 - blk_queue_max_segments(rq, segments); 701 + blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); 828 702 829 703 /* Make sure buffer addresses are sector-aligned. */ 830 704 blk_queue_dma_alignment(rq, 511); ··· 1098 972 info->persistent_gnts_c--; 1099 973 } 1100 974 if (info->feature_persistent) 1101 - __free_page(pfn_to_page(persistent_gnt->pfn)); 975 + __free_page(persistent_gnt->page); 1102 976 kfree(persistent_gnt); 1103 977 } 1104 978 } ··· 1133 1007 persistent_gnt = info->shadow[i].grants_used[j]; 1134 1008 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); 1135 1009 if (info->feature_persistent) 1136 - __free_page(pfn_to_page(persistent_gnt->pfn)); 1010 + __free_page(persistent_gnt->page); 1137 1011 kfree(persistent_gnt); 1138 1012 } 1139 1013 ··· 1147 1021 for (j = 0; j < INDIRECT_GREFS(segs); j++) { 1148 1022 persistent_gnt = info->shadow[i].indirect_grants[j]; 1149 1023 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); 1150 - __free_page(pfn_to_page(persistent_gnt->pfn)); 1024 + __free_page(persistent_gnt->page); 1151 1025 kfree(persistent_gnt); 1152 1026 } 1153 1027 ··· 1183 1057 1184 1058 } 1185 1059 1060 + struct copy_from_grant { 1061 + const struct blk_shadow *s; 1062 + unsigned int grant_idx; 1063 + unsigned int bvec_offset; 1064 + char *bvec_data; 1065 + }; 1066 + 1067 + static void blkif_copy_from_grant(unsigned long gfn, unsigned int offset, 1068 + unsigned int len, void *data) 1069 + { 1070 + struct copy_from_grant *info = data; 1071 + char *shared_data; 1072 + /* Convenient aliases */ 1073 + const struct blk_shadow *s = info->s; 1074 + 1075 + shared_data = kmap_atomic(s->grants_used[info->grant_idx]->page); 1076 + 1077 + memcpy(info->bvec_data + info->bvec_offset, 1078 + shared_data + offset, len); 1079 + 1080 + info->bvec_offset += len; 1081 + info->grant_idx++; 1082 + 1083 + kunmap_atomic(shared_data); 1084 + } 1085 + 1186 1086 static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, 1187 1087 struct blkif_response *bret) 1188 1088 { 1189 1089 int i = 0; 1190 1090 struct scatterlist *sg; 1191 - char *bvec_data; 1192 - void *shared_data; 1193 - int nseg; 1091 + int num_sg, num_grant; 1092 + struct copy_from_grant data = { 1093 + .s = s, 1094 + .grant_idx = 0, 1095 + }; 1194 1096 1195 - nseg = s->req.operation == BLKIF_OP_INDIRECT ? 1097 + num_grant = s->req.operation == BLKIF_OP_INDIRECT ? 1196 1098 s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; 1099 + num_sg = s->num_sg; 1197 1100 1198 1101 if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { 1199 - for_each_sg(s->sg, sg, nseg, i) { 1102 + for_each_sg(s->sg, sg, num_sg, i) { 1200 1103 BUG_ON(sg->offset + sg->length > PAGE_SIZE); 1201 - shared_data = kmap_atomic( 1202 - pfn_to_page(s->grants_used[i]->pfn)); 1203 - bvec_data = kmap_atomic(sg_page(sg)); 1204 - memcpy(bvec_data + sg->offset, 1205 - shared_data + sg->offset, 1206 - sg->length); 1207 - kunmap_atomic(bvec_data); 1208 - kunmap_atomic(shared_data); 1104 + 1105 + data.bvec_offset = sg->offset; 1106 + data.bvec_data = kmap_atomic(sg_page(sg)); 1107 + 1108 + gnttab_foreach_grant_in_range(sg_page(sg), 1109 + sg->offset, 1110 + sg->length, 1111 + blkif_copy_from_grant, 1112 + &data); 1113 + 1114 + kunmap_atomic(data.bvec_data); 1209 1115 } 1210 1116 } 1211 1117 /* Add the persistent grant into the list of free grants */ 1212 - for (i = 0; i < nseg; i++) { 1118 + for (i = 0; i < num_grant; i++) { 1213 1119 if (gnttab_query_foreign_access(s->grants_used[i]->gref)) { 1214 1120 /* 1215 1121 * If the grant is still mapped by the backend (the ··· 1267 1109 } 1268 1110 } 1269 1111 if (s->req.operation == BLKIF_OP_INDIRECT) { 1270 - for (i = 0; i < INDIRECT_GREFS(nseg); i++) { 1112 + for (i = 0; i < INDIRECT_GREFS(num_grant); i++) { 1271 1113 if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) { 1272 1114 if (!info->feature_persistent) 1273 1115 pr_alert_ratelimited("backed has not unmapped grant: %u\n", ··· 1283 1125 * available pages for indirect grefs. 1284 1126 */ 1285 1127 if (!info->feature_persistent) { 1286 - indirect_page = pfn_to_page(s->indirect_grants[i]->pfn); 1128 + indirect_page = s->indirect_grants[i]->page; 1287 1129 list_add(&indirect_page->lru, &info->indirect_pages); 1288 1130 } 1289 1131 s->indirect_grants[i]->gref = GRANT_INVALID_REF; ··· 1412 1254 { 1413 1255 struct blkif_sring *sring; 1414 1256 int err, i; 1415 - unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE; 1416 - grant_ref_t gref[XENBUS_MAX_RING_PAGES]; 1257 + unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE; 1258 + grant_ref_t gref[XENBUS_MAX_RING_GRANTS]; 1417 1259 1418 1260 for (i = 0; i < info->nr_ring_pages; i++) 1419 1261 info->ring_ref[i] = GRANT_INVALID_REF; ··· 1741 1583 atomic_set(&split_bio->pending, pending); 1742 1584 split_bio->bio = bio; 1743 1585 for (i = 0; i < pending; i++) { 1744 - offset = (i * segs * PAGE_SIZE) >> 9; 1745 - size = min((unsigned int)(segs * PAGE_SIZE) >> 9, 1586 + offset = (i * segs * XEN_PAGE_SIZE) >> 9; 1587 + size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9, 1746 1588 (unsigned int)bio_sectors(bio) - offset); 1747 1589 cloned_bio = bio_clone(bio, GFP_NOIO); 1748 1590 BUG_ON(cloned_bio == NULL); ··· 1853 1695 1854 1696 static int blkfront_setup_indirect(struct blkfront_info *info) 1855 1697 { 1856 - unsigned int segs; 1698 + unsigned int psegs, grants; 1857 1699 int err, i; 1858 1700 1859 1701 if (info->max_indirect_segments == 0) 1860 - segs = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1702 + grants = BLKIF_MAX_SEGMENTS_PER_REQUEST; 1861 1703 else 1862 - segs = info->max_indirect_segments; 1704 + grants = info->max_indirect_segments; 1705 + psegs = grants / GRANTS_PER_PSEG; 1863 1706 1864 - err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info)); 1707 + err = fill_grant_buffer(info, 1708 + (grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info)); 1865 1709 if (err) 1866 1710 goto out_of_memory; 1867 1711 ··· 1873 1713 * grants, we need to allocate a set of pages that can be 1874 1714 * used for mapping indirect grefs 1875 1715 */ 1876 - int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info); 1716 + int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); 1877 1717 1878 1718 BUG_ON(!list_empty(&info->indirect_pages)); 1879 1719 for (i = 0; i < num; i++) { ··· 1886 1726 1887 1727 for (i = 0; i < BLK_RING_SIZE(info); i++) { 1888 1728 info->shadow[i].grants_used = kzalloc( 1889 - sizeof(info->shadow[i].grants_used[0]) * segs, 1729 + sizeof(info->shadow[i].grants_used[0]) * grants, 1890 1730 GFP_NOIO); 1891 - info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO); 1731 + info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * psegs, GFP_NOIO); 1892 1732 if (info->max_indirect_segments) 1893 1733 info->shadow[i].indirect_grants = kzalloc( 1894 1734 sizeof(info->shadow[i].indirect_grants[0]) * 1895 - INDIRECT_GREFS(segs), 1735 + INDIRECT_GREFS(grants), 1896 1736 GFP_NOIO); 1897 1737 if ((info->shadow[i].grants_used == NULL) || 1898 1738 (info->shadow[i].sg == NULL) || 1899 1739 (info->max_indirect_segments && 1900 1740 (info->shadow[i].indirect_grants == NULL))) 1901 1741 goto out_of_memory; 1902 - sg_init_table(info->shadow[i].sg, segs); 1742 + sg_init_table(info->shadow[i].sg, psegs); 1903 1743 } 1904 1744 1905 1745 ··· 2285 2125 if (!xen_domain()) 2286 2126 return -ENODEV; 2287 2127 2288 - if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { 2128 + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) { 2289 2129 pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", 2290 - xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); 2130 + xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER); 2291 2131 xen_blkif_max_ring_order = 0; 2292 2132 } 2293 2133

+11 -5

drivers/net/xen-netback/common.h

··· 44 44 #include <xen/interface/grant_table.h> 45 45 #include <xen/grant_table.h> 46 46 #include <xen/xenbus.h> 47 + #include <xen/page.h> 47 48 #include <linux/debugfs.h> 48 49 49 50 typedef unsigned int pending_ring_idx_t; ··· 65 64 struct ubuf_info callback_struct; 66 65 }; 67 66 68 - #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) 69 - #define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) 67 + #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE) 68 + #define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE) 70 69 71 70 struct xenvif_rx_meta { 72 71 int id; ··· 81 80 /* Discriminate from any valid pending_idx value. */ 82 81 #define INVALID_PENDING_IDX 0xFFFF 83 82 84 - #define MAX_BUFFER_OFFSET PAGE_SIZE 83 + #define MAX_BUFFER_OFFSET XEN_PAGE_SIZE 85 84 86 85 #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE 87 86 87 + /* The maximum number of frags is derived from the size of a grant (same 88 + * as a Xen page size for now). 89 + */ 90 + #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) 91 + 88 92 /* It's possible for an skb to have a maximal number of frags 89 93 * but still be less than MAX_BUFFER_OFFSET in size. Thus the 90 - * worst-case number of copy operations is MAX_SKB_FRAGS per 94 + * worst-case number of copy operations is MAX_XEN_SKB_FRAGS per 91 95 * ring slot. 92 96 */ 93 - #define MAX_GRANT_COPY_OPS (MAX_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE) 97 + #define MAX_GRANT_COPY_OPS (MAX_XEN_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE) 94 98 95 99 #define NETBACK_INVALID_HANDLE -1 96 100

+105 -62

drivers/net/xen-netback/netback.c

··· 152 152 static int xenvif_rx_ring_slots_needed(struct xenvif *vif) 153 153 { 154 154 if (vif->gso_mask) 155 - return DIV_ROUND_UP(vif->dev->gso_max_size, PAGE_SIZE) + 1; 155 + return DIV_ROUND_UP(vif->dev->gso_max_size, XEN_PAGE_SIZE) + 1; 156 156 else 157 - return DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 157 + return DIV_ROUND_UP(vif->dev->mtu, XEN_PAGE_SIZE); 158 158 } 159 159 160 160 static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) ··· 274 274 return meta; 275 275 } 276 276 277 + struct gop_frag_copy { 278 + struct xenvif_queue *queue; 279 + struct netrx_pending_operations *npo; 280 + struct xenvif_rx_meta *meta; 281 + int head; 282 + int gso_type; 283 + 284 + struct page *page; 285 + }; 286 + 287 + static void xenvif_setup_copy_gop(unsigned long gfn, 288 + unsigned int offset, 289 + unsigned int *len, 290 + struct gop_frag_copy *info) 291 + { 292 + struct gnttab_copy *copy_gop; 293 + struct xen_page_foreign *foreign; 294 + /* Convenient aliases */ 295 + struct xenvif_queue *queue = info->queue; 296 + struct netrx_pending_operations *npo = info->npo; 297 + struct page *page = info->page; 298 + 299 + BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); 300 + 301 + if (npo->copy_off == MAX_BUFFER_OFFSET) 302 + info->meta = get_next_rx_buffer(queue, npo); 303 + 304 + if (npo->copy_off + *len > MAX_BUFFER_OFFSET) 305 + *len = MAX_BUFFER_OFFSET - npo->copy_off; 306 + 307 + copy_gop = npo->copy + npo->copy_prod++; 308 + copy_gop->flags = GNTCOPY_dest_gref; 309 + copy_gop->len = *len; 310 + 311 + foreign = xen_page_foreign(page); 312 + if (foreign) { 313 + copy_gop->source.domid = foreign->domid; 314 + copy_gop->source.u.ref = foreign->gref; 315 + copy_gop->flags |= GNTCOPY_source_gref; 316 + } else { 317 + copy_gop->source.domid = DOMID_SELF; 318 + copy_gop->source.u.gmfn = gfn; 319 + } 320 + copy_gop->source.offset = offset; 321 + 322 + copy_gop->dest.domid = queue->vif->domid; 323 + copy_gop->dest.offset = npo->copy_off; 324 + copy_gop->dest.u.ref = npo->copy_gref; 325 + 326 + npo->copy_off += *len; 327 + info->meta->size += *len; 328 + 329 + /* Leave a gap for the GSO descriptor. */ 330 + if (info->head && ((1 << info->gso_type) & queue->vif->gso_mask)) 331 + queue->rx.req_cons++; 332 + 333 + info->head = 0; /* There must be something in this buffer now */ 334 + } 335 + 336 + static void xenvif_gop_frag_copy_grant(unsigned long gfn, 337 + unsigned offset, 338 + unsigned int len, 339 + void *data) 340 + { 341 + unsigned int bytes; 342 + 343 + while (len) { 344 + bytes = len; 345 + xenvif_setup_copy_gop(gfn, offset, &bytes, data); 346 + offset += bytes; 347 + len -= bytes; 348 + } 349 + } 350 + 277 351 /* 278 352 * Set up the grant operations for this fragment. If it's a flipping 279 353 * interface, we also set up the unmap request from here. ··· 357 283 struct page *page, unsigned long size, 358 284 unsigned long offset, int *head) 359 285 { 360 - struct gnttab_copy *copy_gop; 361 - struct xenvif_rx_meta *meta; 286 + struct gop_frag_copy info = { 287 + .queue = queue, 288 + .npo = npo, 289 + .head = *head, 290 + .gso_type = XEN_NETIF_GSO_TYPE_NONE, 291 + }; 362 292 unsigned long bytes; 363 - int gso_type = XEN_NETIF_GSO_TYPE_NONE; 293 + 294 + if (skb_is_gso(skb)) { 295 + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 296 + info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4; 297 + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) 298 + info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6; 299 + } 364 300 365 301 /* Data must not cross a page boundary. */ 366 302 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 367 303 368 - meta = npo->meta + npo->meta_prod - 1; 304 + info.meta = npo->meta + npo->meta_prod - 1; 369 305 370 306 /* Skip unused frames from start of page */ 371 307 page += offset >> PAGE_SHIFT; 372 308 offset &= ~PAGE_MASK; 373 309 374 310 while (size > 0) { 375 - struct xen_page_foreign *foreign; 376 - 377 311 BUG_ON(offset >= PAGE_SIZE); 378 - BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); 379 - 380 - if (npo->copy_off == MAX_BUFFER_OFFSET) 381 - meta = get_next_rx_buffer(queue, npo); 382 312 383 313 bytes = PAGE_SIZE - offset; 384 314 if (bytes > size) 385 315 bytes = size; 386 316 387 - if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) 388 - bytes = MAX_BUFFER_OFFSET - npo->copy_off; 389 - 390 - copy_gop = npo->copy + npo->copy_prod++; 391 - copy_gop->flags = GNTCOPY_dest_gref; 392 - copy_gop->len = bytes; 393 - 394 - foreign = xen_page_foreign(page); 395 - if (foreign) { 396 - copy_gop->source.domid = foreign->domid; 397 - copy_gop->source.u.ref = foreign->gref; 398 - copy_gop->flags |= GNTCOPY_source_gref; 399 - } else { 400 - copy_gop->source.domid = DOMID_SELF; 401 - copy_gop->source.u.gmfn = 402 - virt_to_gfn(page_address(page)); 403 - } 404 - copy_gop->source.offset = offset; 405 - 406 - copy_gop->dest.domid = queue->vif->domid; 407 - copy_gop->dest.offset = npo->copy_off; 408 - copy_gop->dest.u.ref = npo->copy_gref; 409 - 410 - npo->copy_off += bytes; 411 - meta->size += bytes; 412 - 413 - offset += bytes; 317 + info.page = page; 318 + gnttab_foreach_grant_in_range(page, offset, bytes, 319 + xenvif_gop_frag_copy_grant, 320 + &info); 414 321 size -= bytes; 322 + offset = 0; 415 323 416 - /* Next frame */ 417 - if (offset == PAGE_SIZE && size) { 324 + /* Next page */ 325 + if (size) { 418 326 BUG_ON(!PageCompound(page)); 419 327 page++; 420 - offset = 0; 421 328 } 422 - 423 - /* Leave a gap for the GSO descriptor. */ 424 - if (skb_is_gso(skb)) { 425 - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 426 - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; 427 - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) 428 - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; 429 - } 430 - 431 - if (*head && ((1 << gso_type) & queue->vif->gso_mask)) 432 - queue->rx.req_cons++; 433 - 434 - *head = 0; /* There must be something in this buffer now. */ 435 - 436 329 } 330 + 331 + *head = info.head; 437 332 } 438 333 439 334 /* ··· 801 758 first->size -= txp->size; 802 759 slots++; 803 760 804 - if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { 761 + if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) { 805 762 netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n", 806 763 txp->offset, txp->size); 807 764 xenvif_fatal_tx_err(queue->vif); ··· 1382 1339 } 1383 1340 1384 1341 /* No crossing a page as the payload mustn't fragment. */ 1385 - if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { 1342 + if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) { 1386 1343 netdev_err(queue->vif->dev, 1387 1344 "txreq.offset: %u, size: %u, end: %lu\n", 1388 1345 txreq.offset, txreq.size, 1389 - (unsigned long)(txreq.offset&~PAGE_MASK) + txreq.size); 1346 + (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size); 1390 1347 xenvif_fatal_tx_err(queue->vif); 1391 1348 break; 1392 1349 } ··· 1452 1409 virt_to_gfn(skb->data); 1453 1410 queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; 1454 1411 queue->tx_copy_ops[*copy_ops].dest.offset = 1455 - offset_in_page(skb->data); 1412 + offset_in_page(skb->data) & ~XEN_PAGE_MASK; 1456 1413 1457 1414 queue->tx_copy_ops[*copy_ops].len = data_len; 1458 1415 queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; ··· 1937 1894 goto err; 1938 1895 1939 1896 txs = (struct xen_netif_tx_sring *)addr; 1940 - BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE); 1897 + BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE); 1941 1898 1942 1899 err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif), 1943 1900 &rx_ring_ref, 1, &addr); ··· 1945 1902 goto err; 1946 1903 1947 1904 rxs = (struct xen_netif_rx_sring *)addr; 1948 - BACK_RING_INIT(&queue->rx, rxs, PAGE_SIZE); 1905 + BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE); 1949 1906 1950 1907 return 0; 1951 1908

+86 -36

drivers/net/xen-netfront.c

··· 74 74 75 75 #define GRANT_INVALID_REF 0 76 76 77 - #define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) 78 - #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) 77 + #define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE) 78 + #define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE) 79 79 80 80 /* Minimum number of Rx slots (includes slot for GSO metadata). */ 81 81 #define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) ··· 291 291 struct sk_buff *skb; 292 292 unsigned short id; 293 293 grant_ref_t ref; 294 - unsigned long gfn; 294 + struct page *page; 295 295 struct xen_netif_rx_request *req; 296 296 297 297 skb = xennet_alloc_one_rx_buffer(queue); ··· 307 307 BUG_ON((signed short)ref < 0); 308 308 queue->grant_rx_ref[id] = ref; 309 309 310 - gfn = xen_page_to_gfn(skb_frag_page(&skb_shinfo(skb)->frags[0])); 310 + page = skb_frag_page(&skb_shinfo(skb)->frags[0]); 311 311 312 312 req = RING_GET_REQUEST(&queue->rx, req_prod); 313 - gnttab_grant_foreign_access_ref(ref, 314 - queue->info->xbdev->otherend_id, 315 - gfn, 316 - 0); 317 - 313 + gnttab_page_grant_foreign_access_ref_one(ref, 314 + queue->info->xbdev->otherend_id, 315 + page, 316 + 0); 318 317 req->id = id; 319 318 req->gref = ref; 320 319 } ··· 414 415 xennet_maybe_wake_tx(queue); 415 416 } 416 417 417 - static struct xen_netif_tx_request *xennet_make_one_txreq( 418 - struct netfront_queue *queue, struct sk_buff *skb, 419 - struct page *page, unsigned int offset, unsigned int len) 418 + struct xennet_gnttab_make_txreq { 419 + struct netfront_queue *queue; 420 + struct sk_buff *skb; 421 + struct page *page; 422 + struct xen_netif_tx_request *tx; /* Last request */ 423 + unsigned int size; 424 + }; 425 + 426 + static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, 427 + unsigned int len, void *data) 420 428 { 429 + struct xennet_gnttab_make_txreq *info = data; 421 430 unsigned int id; 422 431 struct xen_netif_tx_request *tx; 423 432 grant_ref_t ref; 424 - 425 - len = min_t(unsigned int, PAGE_SIZE - offset, len); 433 + /* convenient aliases */ 434 + struct page *page = info->page; 435 + struct netfront_queue *queue = info->queue; 436 + struct sk_buff *skb = info->skb; 426 437 427 438 id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); 428 439 tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); 429 440 ref = gnttab_claim_grant_reference(&queue->gref_tx_head); 430 441 BUG_ON((signed short)ref < 0); 431 442 432 - gnttab_grant_foreign_access_ref(ref, 433 - queue->info->xbdev->otherend_id, 434 - xen_page_to_gfn(page), 435 - GNTMAP_readonly); 443 + gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, 444 + gfn, GNTMAP_readonly); 436 445 437 446 queue->tx_skbs[id].skb = skb; 438 447 queue->grant_tx_page[id] = page; ··· 452 445 tx->size = len; 453 446 tx->flags = 0; 454 447 455 - return tx; 448 + info->tx = tx; 449 + info->size += tx->size; 450 + } 451 + 452 + static struct xen_netif_tx_request *xennet_make_first_txreq( 453 + struct netfront_queue *queue, struct sk_buff *skb, 454 + struct page *page, unsigned int offset, unsigned int len) 455 + { 456 + struct xennet_gnttab_make_txreq info = { 457 + .queue = queue, 458 + .skb = skb, 459 + .page = page, 460 + .size = 0, 461 + }; 462 + 463 + gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info); 464 + 465 + return info.tx; 466 + } 467 + 468 + static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset, 469 + unsigned int len, void *data) 470 + { 471 + struct xennet_gnttab_make_txreq *info = data; 472 + 473 + info->tx->flags |= XEN_NETTXF_more_data; 474 + skb_get(info->skb); 475 + xennet_tx_setup_grant(gfn, offset, len, data); 456 476 } 457 477 458 478 static struct xen_netif_tx_request *xennet_make_txreqs( ··· 487 453 struct sk_buff *skb, struct page *page, 488 454 unsigned int offset, unsigned int len) 489 455 { 456 + struct xennet_gnttab_make_txreq info = { 457 + .queue = queue, 458 + .skb = skb, 459 + .tx = tx, 460 + }; 461 + 490 462 /* Skip unused frames from start of page */ 491 463 page += offset >> PAGE_SHIFT; 492 464 offset &= ~PAGE_MASK; 493 465 494 466 while (len) { 495 - tx->flags |= XEN_NETTXF_more_data; 496 - tx = xennet_make_one_txreq(queue, skb_get(skb), 497 - page, offset, len); 467 + info.page = page; 468 + info.size = 0; 469 + 470 + gnttab_foreach_grant_in_range(page, offset, len, 471 + xennet_make_one_txreq, 472 + &info); 473 + 498 474 page++; 499 475 offset = 0; 500 - len -= tx->size; 476 + len -= info.size; 501 477 } 502 478 503 - return tx; 479 + return info.tx; 504 480 } 505 481 506 482 /* ··· 520 476 static int xennet_count_skb_slots(struct sk_buff *skb) 521 477 { 522 478 int i, frags = skb_shinfo(skb)->nr_frags; 523 - int pages; 479 + int slots; 524 480 525 - pages = PFN_UP(offset_in_page(skb->data) + skb_headlen(skb)); 481 + slots = gnttab_count_grant(offset_in_page(skb->data), 482 + skb_headlen(skb)); 526 483 527 484 for (i = 0; i < frags; i++) { 528 485 skb_frag_t *frag = skb_shinfo(skb)->frags + i; ··· 533 488 /* Skip unused frames from start of page */ 534 489 offset &= ~PAGE_MASK; 535 490 536 - pages += PFN_UP(offset + size); 491 + slots += gnttab_count_grant(offset, size); 537 492 } 538 493 539 - return pages; 494 + return slots; 540 495 } 541 496 542 497 static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb, ··· 556 511 557 512 return queue_idx; 558 513 } 514 + 515 + #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) 559 516 560 517 static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) 561 518 { ··· 593 546 } 594 547 595 548 slots = xennet_count_skb_slots(skb); 596 - if (unlikely(slots > MAX_SKB_FRAGS + 1)) { 549 + if (unlikely(slots > MAX_XEN_SKB_FRAGS + 1)) { 597 550 net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n", 598 551 slots, skb->len); 599 552 if (skb_linearize(skb)) ··· 614 567 } 615 568 616 569 /* First request for the linear area. */ 617 - first_tx = tx = xennet_make_one_txreq(queue, skb, 618 - page, offset, len); 619 - page++; 620 - offset = 0; 570 + first_tx = tx = xennet_make_first_txreq(queue, skb, 571 + page, offset, len); 572 + offset += tx->size; 573 + if (offset == PAGE_SIZE) { 574 + page++; 575 + offset = 0; 576 + } 621 577 len -= tx->size; 622 578 623 579 if (skb->ip_summed == CHECKSUM_PARTIAL) ··· 782 732 783 733 for (;;) { 784 734 if (unlikely(rx->status < 0 || 785 - rx->offset + rx->status > PAGE_SIZE)) { 735 + rx->offset + rx->status > XEN_PAGE_SIZE)) { 786 736 if (net_ratelimit()) 787 737 dev_warn(dev, "rx->offset: %u, size: %d\n", 788 738 rx->offset, rx->status); ··· 1546 1496 goto fail; 1547 1497 } 1548 1498 SHARED_RING_INIT(txs); 1549 - FRONT_RING_INIT(&queue->tx, txs, PAGE_SIZE); 1499 + FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE); 1550 1500 1551 1501 err = xenbus_grant_ring(dev, txs, 1, &gref); 1552 1502 if (err < 0) ··· 1560 1510 goto alloc_rx_ring_fail; 1561 1511 } 1562 1512 SHARED_RING_INIT(rxs); 1563 - FRONT_RING_INIT(&queue->rx, rxs, PAGE_SIZE); 1513 + FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE); 1564 1514 1565 1515 err = xenbus_grant_ring(dev, rxs, 1, &gref); 1566 1516 if (err < 0)

+2 -2

drivers/tty/hvc/hvc_xen.c

··· 230 230 if (r < 0 || v == 0) 231 231 goto err; 232 232 gfn = v; 233 - info->intf = xen_remap(gfn << PAGE_SHIFT, PAGE_SIZE); 233 + info->intf = xen_remap(gfn << XEN_PAGE_SHIFT, XEN_PAGE_SIZE); 234 234 if (info->intf == NULL) 235 235 goto err; 236 236 info->vtermno = HVC_COOKIE; ··· 472 472 struct xencons_info *info = dev_get_drvdata(&dev->dev); 473 473 474 474 xencons_disconnect_backend(info); 475 - memset(info->intf, 0, PAGE_SIZE); 475 + memset(info->intf, 0, XEN_PAGE_SIZE); 476 476 return xencons_connect_backend(dev, info); 477 477 } 478 478

-2

drivers/xen/Makefile

··· 1 - ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),) 2 1 obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 3 - endif 4 2 obj-$(CONFIG_X86) += fallback.o 5 3 obj-y += grant-table.o features.o balloon.o manage.o preempt.o 6 4 obj-y += events/

+225 -118

drivers/xen/balloon.c

··· 54 54 #include <linux/memory.h> 55 55 #include <linux/memory_hotplug.h> 56 56 #include <linux/percpu-defs.h> 57 + #include <linux/slab.h> 58 + #include <linux/sysctl.h> 57 59 58 60 #include <asm/page.h> 59 61 #include <asm/pgalloc.h> ··· 72 70 #include <xen/features.h> 73 71 #include <xen/page.h> 74 72 73 + static int xen_hotplug_unpopulated; 74 + 75 + #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG 76 + 77 + static int zero; 78 + static int one = 1; 79 + 80 + static struct ctl_table balloon_table[] = { 81 + { 82 + .procname = "hotplug_unpopulated", 83 + .data = &xen_hotplug_unpopulated, 84 + .maxlen = sizeof(int), 85 + .mode = 0644, 86 + .proc_handler = proc_dointvec_minmax, 87 + .extra1 = &zero, 88 + .extra2 = &one, 89 + }, 90 + { } 91 + }; 92 + 93 + static struct ctl_table balloon_root[] = { 94 + { 95 + .procname = "balloon", 96 + .mode = 0555, 97 + .child = balloon_table, 98 + }, 99 + { } 100 + }; 101 + 102 + static struct ctl_table xen_root[] = { 103 + { 104 + .procname = "xen", 105 + .mode = 0555, 106 + .child = balloon_root, 107 + }, 108 + { } 109 + }; 110 + 111 + #endif 112 + 113 + /* 114 + * Use one extent per PAGE_SIZE to avoid to break down the page into 115 + * multiple frame. 116 + */ 117 + #define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1) 118 + 75 119 /* 76 120 * balloon_process() state: 77 121 * 78 122 * BP_DONE: done or nothing to do, 123 + * BP_WAIT: wait to be rescheduled, 79 124 * BP_EAGAIN: error, go to sleep, 80 125 * BP_ECANCELED: error, balloon operation canceled. 81 126 */ 82 127 83 128 enum bp_state { 84 129 BP_DONE, 130 + BP_WAIT, 85 131 BP_EAGAIN, 86 132 BP_ECANCELED 87 133 }; ··· 141 91 EXPORT_SYMBOL_GPL(balloon_stats); 142 92 143 93 /* We increase/decrease in batches which fit in a page */ 144 - static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; 94 + static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; 145 95 146 96 147 97 /* List of ballooned pages, threaded through the mem_map array. */ 148 98 static LIST_HEAD(ballooned_pages); 99 + static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); 149 100 150 101 /* Main work function, always executed in process context. */ 151 102 static void balloon_process(struct work_struct *work); ··· 175 124 list_add(&page->lru, &ballooned_pages); 176 125 balloon_stats.balloon_low++; 177 126 } 127 + wake_up(&balloon_wq); 178 128 } 179 129 180 130 static void balloon_append(struct page *page) ··· 185 133 } 186 134 187 135 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 188 - static struct page *balloon_retrieve(bool prefer_highmem) 136 + static struct page *balloon_retrieve(bool require_lowmem) 189 137 { 190 138 struct page *page; 191 139 192 140 if (list_empty(&ballooned_pages)) 193 141 return NULL; 194 142 195 - if (prefer_highmem) 196 - page = list_entry(ballooned_pages.prev, struct page, lru); 197 - else 198 - page = list_entry(ballooned_pages.next, struct page, lru); 143 + page = list_entry(ballooned_pages.next, struct page, lru); 144 + if (require_lowmem && PageHighMem(page)) 145 + return NULL; 199 146 list_del(&page->lru); 200 147 201 148 if (PageHighMem(page)) ··· 217 166 218 167 static enum bp_state update_schedule(enum bp_state state) 219 168 { 169 + if (state == BP_WAIT) 170 + return BP_WAIT; 171 + 220 172 if (state == BP_ECANCELED) 221 173 return BP_ECANCELED; 222 174 ··· 247 193 } 248 194 249 195 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG 250 - static long current_credit(void) 196 + static struct resource *additional_memory_resource(phys_addr_t size) 251 197 { 252 - return balloon_stats.target_pages - balloon_stats.current_pages - 253 - balloon_stats.hotplug_pages; 198 + struct resource *res; 199 + int ret; 200 + 201 + res = kzalloc(sizeof(*res), GFP_KERNEL); 202 + if (!res) 203 + return NULL; 204 + 205 + res->name = "System RAM"; 206 + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 207 + 208 + ret = allocate_resource(&iomem_resource, res, 209 + size, 0, -1, 210 + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); 211 + if (ret < 0) { 212 + pr_err("Cannot allocate new System RAM resource\n"); 213 + kfree(res); 214 + return NULL; 215 + } 216 + 217 + return res; 254 218 } 255 219 256 - static bool balloon_is_inflated(void) 220 + static void release_memory_resource(struct resource *resource) 257 221 { 258 - if (balloon_stats.balloon_low || balloon_stats.balloon_high || 259 - balloon_stats.balloon_hotplug) 260 - return true; 261 - else 262 - return false; 222 + if (!resource) 223 + return; 224 + 225 + /* 226 + * No need to reset region to identity mapped since we now 227 + * know that no I/O can be in this region 228 + */ 229 + release_resource(resource); 230 + kfree(resource); 263 231 } 264 232 265 - /* 266 - * reserve_additional_memory() adds memory region of size >= credit above 267 - * max_pfn. New region is section aligned and size is modified to be multiple 268 - * of section size. Those features allow optimal use of address space and 269 - * establish proper alignment when this function is called first time after 270 - * boot (last section not fully populated at boot time contains unused memory 271 - * pages with PG_reserved bit not set; online_pages_range() does not allow page 272 - * onlining in whole range if first onlined page does not have PG_reserved 273 - * bit set). Real size of added memory is established at page onlining stage. 274 - */ 275 - 276 - static enum bp_state reserve_additional_memory(long credit) 233 + static enum bp_state reserve_additional_memory(void) 277 234 { 235 + long credit; 236 + struct resource *resource; 278 237 int nid, rc; 279 - u64 hotplug_start_paddr; 280 - unsigned long balloon_hotplug = credit; 238 + unsigned long balloon_hotplug; 281 239 282 - hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn)); 283 - balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION); 284 - nid = memory_add_physaddr_to_nid(hotplug_start_paddr); 240 + credit = balloon_stats.target_pages + balloon_stats.target_unpopulated 241 + - balloon_stats.total_pages; 242 + 243 + /* 244 + * Already hotplugged enough pages? Wait for them to be 245 + * onlined. 246 + */ 247 + if (credit <= 0) 248 + return BP_WAIT; 249 + 250 + balloon_hotplug = round_up(credit, PAGES_PER_SECTION); 251 + 252 + resource = additional_memory_resource(balloon_hotplug * PAGE_SIZE); 253 + if (!resource) 254 + goto err; 255 + 256 + nid = memory_add_physaddr_to_nid(resource->start); 285 257 286 258 #ifdef CONFIG_XEN_HAVE_PVMMU 259 + /* 260 + * We don't support PV MMU when Linux and Xen is using 261 + * different page granularity. 262 + */ 263 + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); 264 + 287 265 /* 288 266 * add_memory() will build page tables for the new memory so 289 267 * the p2m must contain invalid entries so the correct ··· 328 242 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 329 243 unsigned long pfn, i; 330 244 331 - pfn = PFN_DOWN(hotplug_start_paddr); 245 + pfn = PFN_DOWN(resource->start); 332 246 for (i = 0; i < balloon_hotplug; i++) { 333 247 if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { 334 248 pr_warn("set_phys_to_machine() failed, no memory added\n"); 335 - return BP_ECANCELED; 249 + goto err; 336 250 } 337 251 } 338 252 } 339 253 #endif 340 254 341 - rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT); 342 - 255 + rc = add_memory_resource(nid, resource); 343 256 if (rc) { 344 257 pr_warn("Cannot add additional memory (%i)\n", rc); 345 - return BP_ECANCELED; 258 + goto err; 346 259 } 347 260 348 - balloon_hotplug -= credit; 261 + balloon_stats.total_pages += balloon_hotplug; 349 262 350 - balloon_stats.hotplug_pages += credit; 351 - balloon_stats.balloon_hotplug = balloon_hotplug; 352 - 353 - return BP_DONE; 263 + return BP_WAIT; 264 + err: 265 + release_memory_resource(resource); 266 + return BP_ECANCELED; 354 267 } 355 268 356 269 static void xen_online_page(struct page *page) ··· 359 274 mutex_lock(&balloon_mutex); 360 275 361 276 __balloon_append(page); 362 - 363 - if (balloon_stats.hotplug_pages) 364 - --balloon_stats.hotplug_pages; 365 - else 366 - --balloon_stats.balloon_hotplug; 367 277 368 278 mutex_unlock(&balloon_mutex); 369 279 } ··· 376 296 .priority = 0 377 297 }; 378 298 #else 299 + static enum bp_state reserve_additional_memory(void) 300 + { 301 + balloon_stats.target_pages = balloon_stats.current_pages; 302 + return BP_ECANCELED; 303 + } 304 + #endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ 305 + 379 306 static long current_credit(void) 380 307 { 381 - unsigned long target = balloon_stats.target_pages; 382 - 383 - target = min(target, 384 - balloon_stats.current_pages + 385 - balloon_stats.balloon_low + 386 - balloon_stats.balloon_high); 387 - 388 - return target - balloon_stats.current_pages; 308 + return balloon_stats.target_pages - balloon_stats.current_pages; 389 309 } 390 310 391 311 static bool balloon_is_inflated(void) 392 312 { 393 - if (balloon_stats.balloon_low || balloon_stats.balloon_high) 394 - return true; 395 - else 396 - return false; 313 + return balloon_stats.balloon_low || balloon_stats.balloon_high; 397 314 } 398 - 399 - static enum bp_state reserve_additional_memory(long credit) 400 - { 401 - balloon_stats.target_pages = balloon_stats.current_pages; 402 - return BP_DONE; 403 - } 404 - #endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ 405 315 406 316 static enum bp_state increase_reservation(unsigned long nr_pages) 407 317 { 408 318 int rc; 409 - unsigned long pfn, i; 319 + unsigned long i; 410 320 struct page *page; 411 321 struct xen_memory_reservation reservation = { 412 322 .address_bits = 0, 413 - .extent_order = 0, 323 + .extent_order = EXTENT_ORDER, 414 324 .domid = DOMID_SELF 415 325 }; 416 - 417 - #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG 418 - if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) { 419 - nr_pages = min(nr_pages, balloon_stats.balloon_hotplug); 420 - balloon_stats.hotplug_pages += nr_pages; 421 - balloon_stats.balloon_hotplug -= nr_pages; 422 - return BP_DONE; 423 - } 424 - #endif 425 326 426 327 if (nr_pages > ARRAY_SIZE(frame_list)) 427 328 nr_pages = ARRAY_SIZE(frame_list); ··· 413 352 nr_pages = i; 414 353 break; 415 354 } 416 - frame_list[i] = page_to_pfn(page); 355 + 356 + /* XENMEM_populate_physmap requires a PFN based on Xen 357 + * granularity. 358 + */ 359 + frame_list[i] = page_to_xen_pfn(page); 417 360 page = balloon_next_page(page); 418 361 } 419 362 ··· 431 366 page = balloon_retrieve(false); 432 367 BUG_ON(page == NULL); 433 368 434 - pfn = page_to_pfn(page); 435 - 436 369 #ifdef CONFIG_XEN_HAVE_PVMMU 370 + /* 371 + * We don't support PV MMU when Linux and Xen is using 372 + * different page granularity. 373 + */ 374 + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); 375 + 437 376 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 377 + unsigned long pfn = page_to_pfn(page); 378 + 438 379 set_phys_to_machine(pfn, frame_list[i]); 439 380 440 381 /* Link back into the page tables if not highmem. */ ··· 467 396 static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) 468 397 { 469 398 enum bp_state state = BP_DONE; 470 - unsigned long pfn, i; 471 - struct page *page; 399 + unsigned long i; 400 + struct page *page, *tmp; 472 401 int ret; 473 402 struct xen_memory_reservation reservation = { 474 403 .address_bits = 0, 475 - .extent_order = 0, 404 + .extent_order = EXTENT_ORDER, 476 405 .domid = DOMID_SELF 477 406 }; 478 - 479 - #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG 480 - if (balloon_stats.hotplug_pages) { 481 - nr_pages = min(nr_pages, balloon_stats.hotplug_pages); 482 - balloon_stats.hotplug_pages -= nr_pages; 483 - balloon_stats.balloon_hotplug += nr_pages; 484 - return BP_DONE; 485 - } 486 - #endif 407 + LIST_HEAD(pages); 487 408 488 409 if (nr_pages > ARRAY_SIZE(frame_list)) 489 410 nr_pages = ARRAY_SIZE(frame_list); ··· 488 425 break; 489 426 } 490 427 scrub_page(page); 491 - 492 - frame_list[i] = page_to_pfn(page); 428 + list_add(&page->lru, &pages); 493 429 } 494 430 495 431 /* ··· 500 438 */ 501 439 kmap_flush_unused(); 502 440 503 - /* Update direct mapping, invalidate P2M, and add to balloon. */ 504 - for (i = 0; i < nr_pages; i++) { 505 - pfn = frame_list[i]; 506 - frame_list[i] = pfn_to_gfn(pfn); 507 - page = pfn_to_page(pfn); 441 + /* 442 + * Setup the frame, update direct mapping, invalidate P2M, 443 + * and add to balloon. 444 + */ 445 + i = 0; 446 + list_for_each_entry_safe(page, tmp, &pages, lru) { 447 + /* XENMEM_decrease_reservation requires a GFN */ 448 + frame_list[i++] = xen_page_to_gfn(page); 508 449 509 450 #ifdef CONFIG_XEN_HAVE_PVMMU 451 + /* 452 + * We don't support PV MMU when Linux and Xen is using 453 + * different page granularity. 454 + */ 455 + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); 456 + 510 457 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 458 + unsigned long pfn = page_to_pfn(page); 459 + 511 460 if (!PageHighMem(page)) { 512 461 ret = HYPERVISOR_update_va_mapping( 513 462 (unsigned long)__va(pfn << PAGE_SHIFT), ··· 528 455 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 529 456 } 530 457 #endif 458 + list_del(&page->lru); 531 459 532 460 balloon_append(page); 533 461 } ··· 566 492 if (balloon_is_inflated()) 567 493 state = increase_reservation(credit); 568 494 else 569 - state = reserve_additional_memory(credit); 495 + state = reserve_additional_memory(); 570 496 } 571 497 572 498 if (credit < 0) ··· 594 520 } 595 521 EXPORT_SYMBOL_GPL(balloon_set_new_target); 596 522 523 + static int add_ballooned_pages(int nr_pages) 524 + { 525 + enum bp_state st; 526 + 527 + if (xen_hotplug_unpopulated) { 528 + st = reserve_additional_memory(); 529 + if (st != BP_ECANCELED) { 530 + mutex_unlock(&balloon_mutex); 531 + wait_event(balloon_wq, 532 + !list_empty(&ballooned_pages)); 533 + mutex_lock(&balloon_mutex); 534 + return 0; 535 + } 536 + } 537 + 538 + st = decrease_reservation(nr_pages, GFP_USER); 539 + if (st != BP_DONE) 540 + return -ENOMEM; 541 + 542 + return 0; 543 + } 544 + 597 545 /** 598 546 * alloc_xenballooned_pages - get pages that have been ballooned out 599 547 * @nr_pages: Number of pages to get 600 548 * @pages: pages returned 601 - * @highmem: allow highmem pages 602 549 * @return 0 on success, error otherwise 603 550 */ 604 - int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) 551 + int alloc_xenballooned_pages(int nr_pages, struct page **pages) 605 552 { 606 553 int pgno = 0; 607 554 struct page *page; 555 + int ret; 556 + 608 557 mutex_lock(&balloon_mutex); 558 + 559 + balloon_stats.target_unpopulated += nr_pages; 560 + 609 561 while (pgno < nr_pages) { 610 - page = balloon_retrieve(highmem); 611 - if (page && (highmem || !PageHighMem(page))) { 562 + page = balloon_retrieve(true); 563 + if (page) { 612 564 pages[pgno++] = page; 565 + #ifdef CONFIG_XEN_HAVE_PVMMU 566 + /* 567 + * We don't support PV MMU when Linux and Xen is using 568 + * different page granularity. 569 + */ 570 + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); 571 + 572 + ret = xen_alloc_p2m_entry(page_to_pfn(page)); 573 + if (ret < 0) 574 + goto out_undo; 575 + #endif 613 576 } else { 614 - enum bp_state st; 615 - if (page) 616 - balloon_append(page); 617 - st = decrease_reservation(nr_pages - pgno, 618 - highmem ? GFP_HIGHUSER : GFP_USER); 619 - if (st != BP_DONE) 577 + ret = add_ballooned_pages(nr_pages - pgno); 578 + if (ret < 0) 620 579 goto out_undo; 621 580 } 622 581 } 623 582 mutex_unlock(&balloon_mutex); 624 583 return 0; 625 584 out_undo: 626 - while (pgno) 627 - balloon_append(pages[--pgno]); 628 - /* Free the memory back to the kernel soon */ 629 - schedule_delayed_work(&balloon_worker, 0); 630 585 mutex_unlock(&balloon_mutex); 631 - return -ENOMEM; 586 + free_xenballooned_pages(pgno, pages); 587 + return ret; 632 588 } 633 589 EXPORT_SYMBOL(alloc_xenballooned_pages); 634 590 ··· 677 573 if (pages[i]) 678 574 balloon_append(pages[i]); 679 575 } 576 + 577 + balloon_stats.target_unpopulated -= nr_pages; 680 578 681 579 /* The balloon may be too large now. Shrink it if needed. */ 682 580 if (current_credit()) ··· 708 602 don't subtract from it. */ 709 603 __balloon_append(page); 710 604 } 605 + 606 + balloon_stats.total_pages += extra_pfn_end - start_pfn; 711 607 } 712 608 713 609 static int __init balloon_init(void) ··· 727 619 balloon_stats.target_pages = balloon_stats.current_pages; 728 620 balloon_stats.balloon_low = 0; 729 621 balloon_stats.balloon_high = 0; 622 + balloon_stats.total_pages = balloon_stats.current_pages; 730 623 731 624 balloon_stats.schedule_delay = 1; 732 625 balloon_stats.max_schedule_delay = 32; ··· 735 626 balloon_stats.max_retry_count = RETRY_UNLIMITED; 736 627 737 628 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG 738 - balloon_stats.hotplug_pages = 0; 739 - balloon_stats.balloon_hotplug = 0; 740 - 741 629 set_online_page_callback(&xen_online_page); 742 630 register_memory_notifier(&xen_memory_nb); 631 + register_sysctl_table(xen_root); 743 632 #endif 744 633 745 634 /*

+8

drivers/xen/biomerge.c

··· 6 6 bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, 7 7 const struct bio_vec *vec2) 8 8 { 9 + #if XEN_PAGE_SIZE == PAGE_SIZE 9 10 unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page)); 10 11 unsigned long bfn2 = pfn_to_bfn(page_to_pfn(vec2->bv_page)); 11 12 12 13 return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && 13 14 ((bfn1 == bfn2) || ((bfn1+1) == bfn2)); 15 + #else 16 + /* 17 + * XXX: Add support for merging bio_vec when using different page 18 + * size in Xen and Linux. 19 + */ 20 + return 0; 21 + #endif 14 22 } 15 23 EXPORT_SYMBOL(xen_biovec_phys_mergeable);

+11 -3

drivers/xen/cpu_hotplug.c

··· 11 11 static void enable_hotplug_cpu(int cpu) 12 12 { 13 13 if (!cpu_present(cpu)) 14 - arch_register_cpu(cpu); 14 + xen_arch_register_cpu(cpu); 15 15 16 16 set_cpu_present(cpu, true); 17 17 } 18 18 19 19 static void disable_hotplug_cpu(int cpu) 20 20 { 21 + if (cpu_online(cpu)) { 22 + lock_device_hotplug(); 23 + device_offline(get_cpu_device(cpu)); 24 + unlock_device_hotplug(); 25 + } 21 26 if (cpu_present(cpu)) 22 - arch_unregister_cpu(cpu); 27 + xen_arch_unregister_cpu(cpu); 23 28 24 29 set_cpu_present(cpu, false); 25 30 } ··· 60 55 enable_hotplug_cpu(cpu); 61 56 break; 62 57 case 0: 63 - (void)cpu_down(cpu); 64 58 disable_hotplug_cpu(cpu); 65 59 break; 66 60 default: ··· 106 102 static struct notifier_block xsn_cpu = { 107 103 .notifier_call = setup_cpu_watcher }; 108 104 105 + #ifdef CONFIG_X86 109 106 if (!xen_pv_domain()) 107 + #else 108 + if (!xen_domain()) 109 + #endif 110 110 return -ENODEV; 111 111 112 112 register_xenstore_notifier(&xsn_cpu);

+1 -1

drivers/xen/events/events_base.c

··· 40 40 #include <asm/idle.h> 41 41 #include <asm/io_apic.h> 42 42 #include <asm/xen/pci.h> 43 - #include <xen/page.h> 44 43 #endif 45 44 #include <asm/sync_bitops.h> 46 45 #include <asm/xen/hypercall.h> 47 46 #include <asm/xen/hypervisor.h> 47 + #include <xen/page.h> 48 48 49 49 #include <xen/xen.h> 50 50 #include <xen/hvm.h>

+1 -1

drivers/xen/events/events_fifo.c

··· 54 54 55 55 #include "events_internal.h" 56 56 57 - #define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t)) 57 + #define EVENT_WORDS_PER_PAGE (XEN_PAGE_SIZE / sizeof(event_word_t)) 58 58 #define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE) 59 59 60 60 struct evtchn_fifo_queue {

+52 -4

drivers/xen/grant-table.c

··· 642 642 if (xen_auto_xlat_grant_frames.count) 643 643 return -EINVAL; 644 644 645 - vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes); 645 + vaddr = xen_remap(addr, XEN_PAGE_SIZE * max_nr_gframes); 646 646 if (vaddr == NULL) { 647 647 pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n", 648 648 &addr); ··· 654 654 return -ENOMEM; 655 655 } 656 656 for (i = 0; i < max_nr_gframes; i++) 657 - pfn[i] = PFN_DOWN(addr) + i; 657 + pfn[i] = XEN_PFN_DOWN(addr) + i; 658 658 659 659 xen_auto_xlat_grant_frames.vaddr = vaddr; 660 660 xen_auto_xlat_grant_frames.pfn = pfn; ··· 687 687 int i; 688 688 int ret; 689 689 690 - ret = alloc_xenballooned_pages(nr_pages, pages, false); 690 + ret = alloc_xenballooned_pages(nr_pages, pages); 691 691 if (ret < 0) 692 692 return ret; 693 693 ··· 775 775 &op->status, __func__); 776 776 } 777 777 EXPORT_SYMBOL_GPL(gnttab_batch_copy); 778 + 779 + void gnttab_foreach_grant_in_range(struct page *page, 780 + unsigned int offset, 781 + unsigned int len, 782 + xen_grant_fn_t fn, 783 + void *data) 784 + { 785 + unsigned int goffset; 786 + unsigned int glen; 787 + unsigned long xen_pfn; 788 + 789 + len = min_t(unsigned int, PAGE_SIZE - offset, len); 790 + goffset = xen_offset_in_page(offset); 791 + 792 + xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(offset); 793 + 794 + while (len) { 795 + glen = min_t(unsigned int, XEN_PAGE_SIZE - goffset, len); 796 + fn(pfn_to_gfn(xen_pfn), goffset, glen, data); 797 + 798 + goffset = 0; 799 + xen_pfn++; 800 + len -= glen; 801 + } 802 + } 803 + EXPORT_SYMBOL_GPL(gnttab_foreach_grant_in_range); 804 + 805 + void gnttab_foreach_grant(struct page **pages, 806 + unsigned int nr_grefs, 807 + xen_grant_fn_t fn, 808 + void *data) 809 + { 810 + unsigned int goffset = 0; 811 + unsigned long xen_pfn = 0; 812 + unsigned int i; 813 + 814 + for (i = 0; i < nr_grefs; i++) { 815 + if ((i % XEN_PFN_PER_PAGE) == 0) { 816 + xen_pfn = page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]); 817 + goffset = 0; 818 + } 819 + 820 + fn(pfn_to_gfn(xen_pfn), goffset, XEN_PAGE_SIZE, data); 821 + 822 + goffset += XEN_PAGE_SIZE; 823 + xen_pfn++; 824 + } 825 + } 778 826 779 827 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, 780 828 struct gnttab_map_grant_ref *kmap_ops, ··· 1026 978 { 1027 979 /* Only version 1 is used, which will always be available. */ 1028 980 grant_table_version = 1; 1029 - grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1); 981 + grefs_per_grant_frame = XEN_PAGE_SIZE / sizeof(struct grant_entry_v1); 1030 982 gnttab_interface = &gnttab_v1_ops; 1031 983 1032 984 pr_info("Grant tables using version %d layout\n", grant_table_version);

+6 -4

drivers/xen/privcmd.c

··· 401 401 if (pages == NULL) 402 402 return -ENOMEM; 403 403 404 - rc = alloc_xenballooned_pages(numpgs, pages, 0); 404 + rc = alloc_xenballooned_pages(numpgs, pages); 405 405 if (rc != 0) { 406 406 pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, 407 407 numpgs, rc); ··· 446 446 return -EINVAL; 447 447 } 448 448 449 - nr_pages = m.num; 449 + nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); 450 450 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) 451 451 return -EINVAL; 452 452 ··· 494 494 goto out_unlock; 495 495 } 496 496 if (xen_feature(XENFEAT_auto_translated_physmap)) { 497 - ret = alloc_empty_pages(vma, m.num); 497 + ret = alloc_empty_pages(vma, nr_pages); 498 498 if (ret < 0) 499 499 goto out_unlock; 500 500 } else ··· 518 518 state.global_error = 0; 519 519 state.version = version; 520 520 521 + BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0); 521 522 /* mmap_batch_fn guarantees ret == 0 */ 522 523 BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), 523 524 &pagelist, mmap_batch_fn, &state)); ··· 583 582 { 584 583 struct page **pages = vma->vm_private_data; 585 584 int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 585 + int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; 586 586 int rc; 587 587 588 588 if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) 589 589 return; 590 590 591 - rc = xen_unmap_domain_gfn_range(vma, numpgs, pages); 591 + rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); 592 592 if (rc == 0) 593 593 free_xenballooned_pages(numpgs, pages); 594 594 else

+21 -22

drivers/xen/swiotlb-xen.c

··· 76 76 static u64 start_dma_addr; 77 77 78 78 /* 79 - * Both of these functions should avoid PFN_PHYS because phys_addr_t 79 + * Both of these functions should avoid XEN_PFN_PHYS because phys_addr_t 80 80 * can be 32bit when dma_addr_t is 64bit leading to a loss in 81 81 * information if the shift is done before casting to 64bit. 82 82 */ 83 83 static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr) 84 84 { 85 - unsigned long bfn = pfn_to_bfn(PFN_DOWN(paddr)); 86 - dma_addr_t dma = (dma_addr_t)bfn << PAGE_SHIFT; 85 + unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr)); 86 + dma_addr_t dma = (dma_addr_t)bfn << XEN_PAGE_SHIFT; 87 87 88 - dma |= paddr & ~PAGE_MASK; 88 + dma |= paddr & ~XEN_PAGE_MASK; 89 89 90 90 return dma; 91 91 } 92 92 93 93 static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr) 94 94 { 95 - unsigned long pfn = bfn_to_pfn(PFN_DOWN(baddr)); 96 - dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT; 95 + unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr)); 96 + dma_addr_t dma = (dma_addr_t)xen_pfn << XEN_PAGE_SHIFT; 97 97 phys_addr_t paddr = dma; 98 98 99 - paddr |= baddr & ~PAGE_MASK; 99 + paddr |= baddr & ~XEN_PAGE_MASK; 100 100 101 101 return paddr; 102 102 } ··· 106 106 return xen_phys_to_bus(virt_to_phys(address)); 107 107 } 108 108 109 - static int check_pages_physically_contiguous(unsigned long pfn, 109 + static int check_pages_physically_contiguous(unsigned long xen_pfn, 110 110 unsigned int offset, 111 111 size_t length) 112 112 { ··· 114 114 int i; 115 115 int nr_pages; 116 116 117 - next_bfn = pfn_to_bfn(pfn); 118 - nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; 117 + next_bfn = pfn_to_bfn(xen_pfn); 118 + nr_pages = (offset + length + XEN_PAGE_SIZE-1) >> XEN_PAGE_SHIFT; 119 119 120 120 for (i = 1; i < nr_pages; i++) { 121 - if (pfn_to_bfn(++pfn) != ++next_bfn) 121 + if (pfn_to_bfn(++xen_pfn) != ++next_bfn) 122 122 return 0; 123 123 } 124 124 return 1; ··· 126 126 127 127 static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) 128 128 { 129 - unsigned long pfn = PFN_DOWN(p); 130 - unsigned int offset = p & ~PAGE_MASK; 129 + unsigned long xen_pfn = XEN_PFN_DOWN(p); 130 + unsigned int offset = p & ~XEN_PAGE_MASK; 131 131 132 - if (offset + size <= PAGE_SIZE) 132 + if (offset + size <= XEN_PAGE_SIZE) 133 133 return 0; 134 - if (check_pages_physically_contiguous(pfn, offset, size)) 134 + if (check_pages_physically_contiguous(xen_pfn, offset, size)) 135 135 return 0; 136 136 return 1; 137 137 } 138 138 139 139 static int is_xen_swiotlb_buffer(dma_addr_t dma_addr) 140 140 { 141 - unsigned long bfn = PFN_DOWN(dma_addr); 142 - unsigned long pfn = bfn_to_local_pfn(bfn); 143 - phys_addr_t paddr; 141 + unsigned long bfn = XEN_PFN_DOWN(dma_addr); 142 + unsigned long xen_pfn = bfn_to_local_pfn(bfn); 143 + phys_addr_t paddr = XEN_PFN_PHYS(xen_pfn); 144 144 145 145 /* If the address is outside our domain, it CAN 146 146 * have the same virtual address as another address 147 147 * in our domain. Therefore _only_ check address within our domain. 148 148 */ 149 - if (pfn_valid(pfn)) { 150 - paddr = PFN_PHYS(pfn); 149 + if (pfn_valid(PFN_DOWN(paddr))) { 151 150 return paddr >= virt_to_phys(xen_io_tlb_start) && 152 151 paddr < virt_to_phys(xen_io_tlb_end); 153 152 } ··· 391 392 */ 392 393 if (dma_capable(dev, dev_addr, size) && 393 394 !range_straddles_page_boundary(phys, size) && 394 - !xen_arch_need_swiotlb(dev, PFN_DOWN(phys), PFN_DOWN(dev_addr)) && 395 + !xen_arch_need_swiotlb(dev, phys, dev_addr) && 395 396 !swiotlb_force) { 396 397 /* we are not interested in the dma_addr returned by 397 398 * xen_dma_map_page, only in the potential cache flushes executed ··· 550 551 dma_addr_t dev_addr = xen_phys_to_bus(paddr); 551 552 552 553 if (swiotlb_force || 553 - xen_arch_need_swiotlb(hwdev, PFN_DOWN(paddr), PFN_DOWN(dev_addr)) || 554 + xen_arch_need_swiotlb(hwdev, paddr, dev_addr) || 554 555 !dma_capable(hwdev, dev_addr, sg->length) || 555 556 range_straddles_page_boundary(paddr, sg->length)) { 556 557 phys_addr_t map = swiotlb_tbl_map_single(hwdev,

+87 -41

drivers/xen/xenbus/xenbus_client.c

··· 49 49 50 50 #include "xenbus_probe.h" 51 51 52 + #define XENBUS_PAGES(_grants) (DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE)) 53 + 54 + #define XENBUS_MAX_RING_PAGES (XENBUS_PAGES(XENBUS_MAX_RING_GRANTS)) 55 + 52 56 struct xenbus_map_node { 53 57 struct list_head next; 54 58 union { ··· 61 57 } pv; 62 58 struct { 63 59 struct page *pages[XENBUS_MAX_RING_PAGES]; 60 + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; 64 61 void *addr; 65 62 } hvm; 66 63 }; 67 - grant_handle_t handles[XENBUS_MAX_RING_PAGES]; 64 + grant_handle_t handles[XENBUS_MAX_RING_GRANTS]; 68 65 unsigned int nr_handles; 69 66 }; 70 67 ··· 393 388 } 394 389 grefs[i] = err; 395 390 396 - vaddr = vaddr + PAGE_SIZE; 391 + vaddr = vaddr + XEN_PAGE_SIZE; 397 392 } 398 393 399 394 return 0; ··· 484 479 unsigned int flags, 485 480 bool *leaked) 486 481 { 487 - struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES]; 488 - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; 482 + struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS]; 483 + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; 489 484 int i, j; 490 485 int err = GNTST_okay; 491 486 492 - if (nr_grefs > XENBUS_MAX_RING_PAGES) 487 + if (nr_grefs > XENBUS_MAX_RING_GRANTS) 493 488 return -EINVAL; 494 489 495 490 for (i = 0; i < nr_grefs; i++) { ··· 545 540 { 546 541 struct xenbus_map_node *node; 547 542 struct vm_struct *area; 548 - pte_t *ptes[XENBUS_MAX_RING_PAGES]; 549 - phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; 543 + pte_t *ptes[XENBUS_MAX_RING_GRANTS]; 544 + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; 550 545 int err = GNTST_okay; 551 546 int i; 552 547 bool leaked; 553 548 554 549 *vaddr = NULL; 555 550 556 - if (nr_grefs > XENBUS_MAX_RING_PAGES) 551 + if (nr_grefs > XENBUS_MAX_RING_GRANTS) 557 552 return -EINVAL; 558 553 559 554 node = kzalloc(sizeof(*node), GFP_KERNEL); 560 555 if (!node) 561 556 return -ENOMEM; 562 557 563 - area = alloc_vm_area(PAGE_SIZE * nr_grefs, ptes); 558 + area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes); 564 559 if (!area) { 565 560 kfree(node); 566 561 return -ENOMEM; ··· 596 591 return err; 597 592 } 598 593 594 + struct map_ring_valloc_hvm 595 + { 596 + unsigned int idx; 597 + 598 + /* Why do we need two arrays? See comment of __xenbus_map_ring */ 599 + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; 600 + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; 601 + }; 602 + 603 + static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn, 604 + unsigned int goffset, 605 + unsigned int len, 606 + void *data) 607 + { 608 + struct map_ring_valloc_hvm *info = data; 609 + unsigned long vaddr = (unsigned long)gfn_to_virt(gfn); 610 + 611 + info->phys_addrs[info->idx] = vaddr; 612 + info->addrs[info->idx] = vaddr; 613 + 614 + info->idx++; 615 + } 616 + 599 617 static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, 600 618 grant_ref_t *gnt_ref, 601 619 unsigned int nr_grefs, 602 620 void **vaddr) 603 621 { 604 622 struct xenbus_map_node *node; 605 - int i; 606 623 int err; 607 624 void *addr; 608 625 bool leaked = false; 609 - /* Why do we need two arrays? See comment of __xenbus_map_ring */ 610 - phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; 611 - unsigned long addrs[XENBUS_MAX_RING_PAGES]; 626 + struct map_ring_valloc_hvm info = { 627 + .idx = 0, 628 + }; 629 + unsigned int nr_pages = XENBUS_PAGES(nr_grefs); 612 630 613 - if (nr_grefs > XENBUS_MAX_RING_PAGES) 631 + if (nr_grefs > XENBUS_MAX_RING_GRANTS) 614 632 return -EINVAL; 615 633 616 634 *vaddr = NULL; ··· 642 614 if (!node) 643 615 return -ENOMEM; 644 616 645 - err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages, 646 - false /* lowmem */); 617 + err = alloc_xenballooned_pages(nr_pages, node->hvm.pages); 647 618 if (err) 648 619 goto out_err; 649 620 650 - for (i = 0; i < nr_grefs; i++) { 651 - unsigned long pfn = page_to_pfn(node->hvm.pages[i]); 652 - phys_addrs[i] = (unsigned long)pfn_to_kaddr(pfn); 653 - addrs[i] = (unsigned long)pfn_to_kaddr(pfn); 654 - } 621 + gnttab_foreach_grant(node->hvm.pages, nr_grefs, 622 + xenbus_map_ring_setup_grant_hvm, 623 + &info); 655 624 656 625 err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles, 657 - phys_addrs, GNTMAP_host_map, &leaked); 626 + info.phys_addrs, GNTMAP_host_map, &leaked); 658 627 node->nr_handles = nr_grefs; 659 628 660 629 if (err) 661 630 goto out_free_ballooned_pages; 662 631 663 - addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP, 632 + addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP, 664 633 PAGE_KERNEL); 665 634 if (!addr) { 666 635 err = -ENOMEM; ··· 675 650 676 651 out_xenbus_unmap_ring: 677 652 if (!leaked) 678 - xenbus_unmap_ring(dev, node->handles, node->nr_handles, 679 - addrs); 653 + xenbus_unmap_ring(dev, node->handles, nr_grefs, info.addrs); 680 654 else 681 655 pr_alert("leaking %p size %u page(s)", 682 - addr, nr_grefs); 656 + addr, nr_pages); 683 657 out_free_ballooned_pages: 684 658 if (!leaked) 685 - free_xenballooned_pages(nr_grefs, node->hvm.pages); 659 + free_xenballooned_pages(nr_pages, node->hvm.pages); 686 660 out_err: 687 661 kfree(node); 688 662 return err; ··· 711 687 unsigned int nr_grefs, grant_handle_t *handles, 712 688 unsigned long *vaddrs, bool *leaked) 713 689 { 714 - phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; 690 + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; 715 691 int i; 716 692 717 - if (nr_grefs > XENBUS_MAX_RING_PAGES) 693 + if (nr_grefs > XENBUS_MAX_RING_GRANTS) 718 694 return -EINVAL; 719 695 720 696 for (i = 0; i < nr_grefs; i++) ··· 747 723 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) 748 724 { 749 725 struct xenbus_map_node *node; 750 - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; 726 + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; 751 727 unsigned int level; 752 728 int i; 753 729 bool leaked = false; ··· 774 750 unsigned long addr; 775 751 776 752 memset(&unmap[i], 0, sizeof(unmap[i])); 777 - addr = (unsigned long)vaddr + (PAGE_SIZE * i); 753 + addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i); 778 754 unmap[i].host_addr = arbitrary_virt_to_machine( 779 755 lookup_address(addr, &level)).maddr; 780 756 unmap[i].dev_bus_addr = 0; ··· 807 783 return err; 808 784 } 809 785 786 + struct unmap_ring_vfree_hvm 787 + { 788 + unsigned int idx; 789 + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; 790 + }; 791 + 792 + static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn, 793 + unsigned int goffset, 794 + unsigned int len, 795 + void *data) 796 + { 797 + struct unmap_ring_vfree_hvm *info = data; 798 + 799 + info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn); 800 + 801 + info->idx++; 802 + } 803 + 810 804 static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) 811 805 { 812 806 int rv; 813 807 struct xenbus_map_node *node; 814 808 void *addr; 815 - unsigned long addrs[XENBUS_MAX_RING_PAGES]; 816 - int i; 809 + struct unmap_ring_vfree_hvm info = { 810 + .idx = 0, 811 + }; 812 + unsigned int nr_pages; 817 813 818 814 spin_lock(&xenbus_valloc_lock); 819 815 list_for_each_entry(node, &xenbus_valloc_pages, next) { ··· 853 809 return GNTST_bad_virt_addr; 854 810 } 855 811 856 - for (i = 0; i < node->nr_handles; i++) 857 - addrs[i] = (unsigned long)pfn_to_kaddr(page_to_pfn(node->hvm.pages[i])); 812 + nr_pages = XENBUS_PAGES(node->nr_handles); 813 + 814 + gnttab_foreach_grant(node->hvm.pages, node->nr_handles, 815 + xenbus_unmap_ring_setup_grant_hvm, 816 + &info); 858 817 859 818 rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles, 860 - addrs); 819 + info.addrs); 861 820 if (!rv) { 862 821 vunmap(vaddr); 863 - free_xenballooned_pages(node->nr_handles, node->hvm.pages); 822 + free_xenballooned_pages(nr_pages, node->hvm.pages); 864 823 } 865 824 else 866 - WARN(1, "Leaking %p, size %u page(s)\n", vaddr, 867 - node->nr_handles); 825 + WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages); 868 826 869 827 kfree(node); 870 828 return rv; ··· 887 841 grant_handle_t *handles, unsigned int nr_handles, 888 842 unsigned long *vaddrs) 889 843 { 890 - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; 844 + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; 891 845 int i; 892 846 int err; 893 847 894 - if (nr_handles > XENBUS_MAX_RING_PAGES) 848 + if (nr_handles > XENBUS_MAX_RING_GRANTS) 895 849 return -EINVAL; 896 850 897 851 for (i = 0; i < nr_handles; i++)

+2 -1

drivers/xen/xenbus/xenbus_probe.c

··· 802 802 goto out_error; 803 803 xen_store_gfn = (unsigned long)v; 804 804 xen_store_interface = 805 - xen_remap(xen_store_gfn << PAGE_SHIFT, PAGE_SIZE); 805 + xen_remap(xen_store_gfn << XEN_PAGE_SHIFT, 806 + XEN_PAGE_SIZE); 806 807 break; 807 808 default: 808 809 pr_warn("Xenstore state unknown\n");

+84 -40

drivers/xen/xlate_mmu.c

··· 38 38 #include <xen/interface/xen.h> 39 39 #include <xen/interface/memory.h> 40 40 41 - /* map fgfn of domid to lpfn in the current domain */ 42 - static int map_foreign_page(unsigned long lpfn, unsigned long fgfn, 43 - unsigned int domid) 41 + typedef void (*xen_gfn_fn_t)(unsigned long gfn, void *data); 42 + 43 + /* Break down the pages in 4KB chunk and call fn for each gfn */ 44 + static void xen_for_each_gfn(struct page **pages, unsigned nr_gfn, 45 + xen_gfn_fn_t fn, void *data) 44 46 { 45 - int rc; 46 - struct xen_add_to_physmap_range xatp = { 47 - .domid = DOMID_SELF, 48 - .foreign_domid = domid, 49 - .size = 1, 50 - .space = XENMAPSPACE_gmfn_foreign, 51 - }; 52 - xen_ulong_t idx = fgfn; 53 - xen_pfn_t gpfn = lpfn; 54 - int err = 0; 47 + unsigned long xen_pfn = 0; 48 + struct page *page; 49 + int i; 55 50 56 - set_xen_guest_handle(xatp.idxs, &idx); 57 - set_xen_guest_handle(xatp.gpfns, &gpfn); 58 - set_xen_guest_handle(xatp.errs, &err); 59 - 60 - rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); 61 - return rc < 0 ? rc : err; 51 + for (i = 0; i < nr_gfn; i++) { 52 + if ((i % XEN_PFN_PER_PAGE) == 0) { 53 + page = pages[i / XEN_PFN_PER_PAGE]; 54 + xen_pfn = page_to_xen_pfn(page); 55 + } 56 + fn(pfn_to_gfn(xen_pfn++), data); 57 + } 62 58 } 63 59 64 60 struct remap_data { 65 61 xen_pfn_t *fgfn; /* foreign domain's gfn */ 62 + int nr_fgfn; /* Number of foreign gfn left to map */ 66 63 pgprot_t prot; 67 64 domid_t domid; 68 65 struct vm_area_struct *vma; ··· 68 71 struct xen_remap_gfn_info *info; 69 72 int *err_ptr; 70 73 int mapped; 74 + 75 + /* Hypercall parameters */ 76 + int h_errs[XEN_PFN_PER_PAGE]; 77 + xen_ulong_t h_idxs[XEN_PFN_PER_PAGE]; 78 + xen_pfn_t h_gpfns[XEN_PFN_PER_PAGE]; 79 + 80 + int h_iter; /* Iterator */ 71 81 }; 82 + 83 + static void setup_hparams(unsigned long gfn, void *data) 84 + { 85 + struct remap_data *info = data; 86 + 87 + info->h_idxs[info->h_iter] = *info->fgfn; 88 + info->h_gpfns[info->h_iter] = gfn; 89 + info->h_errs[info->h_iter] = 0; 90 + 91 + info->h_iter++; 92 + info->fgfn++; 93 + } 72 94 73 95 static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, 74 96 void *data) 75 97 { 76 98 struct remap_data *info = data; 77 99 struct page *page = info->pages[info->index++]; 78 - unsigned long pfn = page_to_pfn(page); 79 - pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot)); 80 - int rc; 100 + pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), info->prot)); 101 + int rc, nr_gfn; 102 + uint32_t i; 103 + struct xen_add_to_physmap_range xatp = { 104 + .domid = DOMID_SELF, 105 + .foreign_domid = info->domid, 106 + .space = XENMAPSPACE_gmfn_foreign, 107 + }; 81 108 82 - rc = map_foreign_page(pfn, *info->fgfn, info->domid); 83 - *info->err_ptr++ = rc; 84 - if (!rc) { 85 - set_pte_at(info->vma->vm_mm, addr, ptep, pte); 86 - info->mapped++; 109 + nr_gfn = min_t(typeof(info->nr_fgfn), XEN_PFN_PER_PAGE, info->nr_fgfn); 110 + info->nr_fgfn -= nr_gfn; 111 + 112 + info->h_iter = 0; 113 + xen_for_each_gfn(&page, nr_gfn, setup_hparams, info); 114 + BUG_ON(info->h_iter != nr_gfn); 115 + 116 + set_xen_guest_handle(xatp.idxs, info->h_idxs); 117 + set_xen_guest_handle(xatp.gpfns, info->h_gpfns); 118 + set_xen_guest_handle(xatp.errs, info->h_errs); 119 + xatp.size = nr_gfn; 120 + 121 + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); 122 + 123 + /* info->err_ptr expect to have one error status per Xen PFN */ 124 + for (i = 0; i < nr_gfn; i++) { 125 + int err = (rc < 0) ? rc : info->h_errs[i]; 126 + 127 + *(info->err_ptr++) = err; 128 + if (!err) 129 + info->mapped++; 87 130 } 88 - info->fgfn++; 131 + 132 + /* 133 + * Note: The hypercall will return 0 in most of the case if even if 134 + * all the fgmfn are not mapped. We still have to update the pte 135 + * as the userspace may decide to continue. 136 + */ 137 + if (!rc) 138 + set_pte_at(info->vma->vm_mm, addr, ptep, pte); 89 139 90 140 return 0; 91 141 } ··· 146 102 { 147 103 int err; 148 104 struct remap_data data; 149 - unsigned long range = nr << PAGE_SHIFT; 105 + unsigned long range = DIV_ROUND_UP(nr, XEN_PFN_PER_PAGE) << PAGE_SHIFT; 150 106 151 107 /* Kept here for the purpose of making sure code doesn't break 152 108 x86 PVOPS */ 153 109 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); 154 110 155 111 data.fgfn = gfn; 112 + data.nr_fgfn = nr; 156 113 data.prot = prot; 157 114 data.domid = domid; 158 115 data.vma = vma; ··· 168 123 } 169 124 EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array); 170 125 126 + static void unmap_gfn(unsigned long gfn, void *data) 127 + { 128 + struct xen_remove_from_physmap xrp; 129 + 130 + xrp.domid = DOMID_SELF; 131 + xrp.gpfn = gfn; 132 + (void)HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); 133 + } 134 + 171 135 int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, 172 136 int nr, struct page **pages) 173 137 { 174 - int i; 138 + xen_for_each_gfn(pages, nr, unmap_gfn, NULL); 175 139 176 - for (i = 0; i < nr; i++) { 177 - struct xen_remove_from_physmap xrp; 178 - unsigned long pfn; 179 - 180 - pfn = page_to_pfn(pages[i]); 181 - 182 - xrp.domid = DOMID_SELF; 183 - xrp.gpfn = pfn; 184 - (void)HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); 185 - } 186 140 return 0; 187 141 } 188 142 EXPORT_SYMBOL_GPL(xen_xlate_unmap_gfn_range);

+2

include/linux/memory_hotplug.h

··· 11 11 struct pglist_data; 12 12 struct mem_section; 13 13 struct memory_block; 14 + struct resource; 14 15 15 16 #ifdef CONFIG_MEMORY_HOTPLUG 16 17 ··· 267 266 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, 268 267 void *arg, int (*func)(struct memory_block *, void *)); 269 268 extern int add_memory(int nid, u64 start, u64 size); 269 + extern int add_memory_resource(int nid, struct resource *resource); 270 270 extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, 271 271 bool for_device); 272 272 extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);

+12 -10

include/uapi/xen/gntalloc.h

··· 11 11 #ifndef __LINUX_PUBLIC_GNTALLOC_H__ 12 12 #define __LINUX_PUBLIC_GNTALLOC_H__ 13 13 14 + #include <linux/types.h> 15 + 14 16 /* 15 17 * Allocates a new page and creates a new grant reference. 16 18 */ ··· 21 19 struct ioctl_gntalloc_alloc_gref { 22 20 /* IN parameters */ 23 21 /* The ID of the domain to be given access to the grants. */ 24 - uint16_t domid; 22 + __u16 domid; 25 23 /* Flags for this mapping */ 26 - uint16_t flags; 24 + __u16 flags; 27 25 /* Number of pages to map */ 28 - uint32_t count; 26 + __u32 count; 29 27 /* OUT parameters */ 30 28 /* The offset to be used on a subsequent call to mmap(). */ 31 - uint64_t index; 29 + __u64 index; 32 30 /* The grant references of the newly created grant, one per page */ 33 31 /* Variable size, depending on count */ 34 - uint32_t gref_ids[1]; 32 + __u32 gref_ids[1]; 35 33 }; 36 34 37 35 #define GNTALLOC_FLAG_WRITABLE 1 ··· 45 43 struct ioctl_gntalloc_dealloc_gref { 46 44 /* IN parameters */ 47 45 /* The offset returned in the map operation */ 48 - uint64_t index; 46 + __u64 index; 49 47 /* Number of references to unmap */ 50 - uint32_t count; 48 + __u32 count; 51 49 }; 52 50 53 51 /* ··· 69 67 * be cleared. Otherwise, it can be any byte in the page whose 70 68 * notification we are adjusting. 71 69 */ 72 - uint64_t index; 70 + __u64 index; 73 71 /* Action(s) to take on unmap */ 74 - uint32_t action; 72 + __u32 action; 75 73 /* Event channel to notify */ 76 - uint32_t event_channel_port; 74 + __u32 event_channel_port; 77 75 }; 78 76 79 77 /* Clear (set to zero) the byte specified by index */

+18 -16

include/uapi/xen/gntdev.h

··· 33 33 #ifndef __LINUX_PUBLIC_GNTDEV_H__ 34 34 #define __LINUX_PUBLIC_GNTDEV_H__ 35 35 36 + #include <linux/types.h> 37 + 36 38 struct ioctl_gntdev_grant_ref { 37 39 /* The domain ID of the grant to be mapped. */ 38 - uint32_t domid; 40 + __u32 domid; 39 41 /* The grant reference of the grant to be mapped. */ 40 - uint32_t ref; 42 + __u32 ref; 41 43 }; 42 44 43 45 /* ··· 52 50 struct ioctl_gntdev_map_grant_ref { 53 51 /* IN parameters */ 54 52 /* The number of grants to be mapped. */ 55 - uint32_t count; 56 - uint32_t pad; 53 + __u32 count; 54 + __u32 pad; 57 55 /* OUT parameters */ 58 56 /* The offset to be used on a subsequent call to mmap(). */ 59 - uint64_t index; 57 + __u64 index; 60 58 /* Variable IN parameter. */ 61 59 /* Array of grant references, of size @count. */ 62 60 struct ioctl_gntdev_grant_ref refs[1]; ··· 72 70 struct ioctl_gntdev_unmap_grant_ref { 73 71 /* IN parameters */ 74 72 /* The offset was returned by the corresponding map operation. */ 75 - uint64_t index; 73 + __u64 index; 76 74 /* The number of pages to be unmapped. */ 77 - uint32_t count; 78 - uint32_t pad; 75 + __u32 count; 76 + __u32 pad; 79 77 }; 80 78 81 79 /* ··· 95 93 struct ioctl_gntdev_get_offset_for_vaddr { 96 94 /* IN parameters */ 97 95 /* The virtual address of the first mapped page in a range. */ 98 - uint64_t vaddr; 96 + __u64 vaddr; 99 97 /* OUT parameters */ 100 98 /* The offset that was used in the initial mmap() operation. */ 101 - uint64_t offset; 99 + __u64 offset; 102 100 /* The number of pages mapped in the VM area that begins at @vaddr. */ 103 - uint32_t count; 104 - uint32_t pad; 101 + __u32 count; 102 + __u32 pad; 105 103 }; 106 104 107 105 /* ··· 115 113 struct ioctl_gntdev_set_max_grants { 116 114 /* IN parameter */ 117 115 /* The maximum number of grants that may be mapped at once. */ 118 - uint32_t count; 116 + __u32 count; 119 117 }; 120 118 121 119 /* ··· 137 135 * be cleared. Otherwise, it can be any byte in the page whose 138 136 * notification we are adjusting. 139 137 */ 140 - uint64_t index; 138 + __u64 index; 141 139 /* Action(s) to take on unmap */ 142 - uint32_t action; 140 + __u32 action; 143 141 /* Event channel to notify */ 144 - uint32_t event_channel_port; 142 + __u32 event_channel_port; 145 143 }; 146 144 147 145 /* Clear (set to zero) the byte specified by index */

+3 -9

include/xen/balloon.h

··· 8 8 /* We aim for 'current allocation' == 'target allocation'. */ 9 9 unsigned long current_pages; 10 10 unsigned long target_pages; 11 + unsigned long target_unpopulated; 11 12 /* Number of pages in high- and low-memory balloons. */ 12 13 unsigned long balloon_low; 13 14 unsigned long balloon_high; 15 + unsigned long total_pages; 14 16 unsigned long schedule_delay; 15 17 unsigned long max_schedule_delay; 16 18 unsigned long retry_count; 17 19 unsigned long max_retry_count; 18 - #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG 19 - unsigned long hotplug_pages; 20 - unsigned long balloon_hotplug; 21 - #endif 22 20 }; 23 21 24 22 extern struct balloon_stats balloon_stats; 25 23 26 24 void balloon_set_new_target(unsigned long target); 27 25 28 - int alloc_xenballooned_pages(int nr_pages, struct page **pages, 29 - bool highmem); 26 + int alloc_xenballooned_pages(int nr_pages, struct page **pages); 30 27 void free_xenballooned_pages(int nr_pages, struct page **pages); 31 - 32 - struct page *get_balloon_scratch_page(void); 33 - void put_balloon_scratch_page(void); 34 28 35 29 struct device; 36 30 #ifdef CONFIG_XEN_SELFBALLOONING

+57

include/xen/grant_table.h

··· 45 45 #include <asm/xen/hypervisor.h> 46 46 47 47 #include <xen/features.h> 48 + #include <xen/page.h> 48 49 #include <linux/mm_types.h> 49 50 #include <linux/page-flags.h> 51 + #include <linux/kernel.h> 50 52 51 53 #define GNTTAB_RESERVED_XENSTORE 1 52 54 ··· 130 128 131 129 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 132 130 unsigned long frame, int readonly); 131 + 132 + /* Give access to the first 4K of the page */ 133 + static inline void gnttab_page_grant_foreign_access_ref_one( 134 + grant_ref_t ref, domid_t domid, 135 + struct page *page, int readonly) 136 + { 137 + gnttab_grant_foreign_access_ref(ref, domid, xen_page_to_gfn(page), 138 + readonly); 139 + } 133 140 134 141 void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, 135 142 unsigned long pfn); ··· 233 222 BUILD_BUG_ON(sizeof(struct xen_page_foreign) > BITS_PER_LONG); 234 223 return (struct xen_page_foreign *)&page->private; 235 224 #endif 225 + } 226 + 227 + /* Split Linux page in chunk of the size of the grant and call fn 228 + * 229 + * Parameters of fn: 230 + * gfn: guest frame number 231 + * offset: offset in the grant 232 + * len: length of the data in the grant. 233 + * data: internal information 234 + */ 235 + typedef void (*xen_grant_fn_t)(unsigned long gfn, unsigned int offset, 236 + unsigned int len, void *data); 237 + 238 + void gnttab_foreach_grant_in_range(struct page *page, 239 + unsigned int offset, 240 + unsigned int len, 241 + xen_grant_fn_t fn, 242 + void *data); 243 + 244 + /* Helper to get to call fn only on the first "grant chunk" */ 245 + static inline void gnttab_for_one_grant(struct page *page, unsigned int offset, 246 + unsigned len, xen_grant_fn_t fn, 247 + void *data) 248 + { 249 + /* The first request is limited to the size of one grant */ 250 + len = min_t(unsigned int, XEN_PAGE_SIZE - (offset & ~XEN_PAGE_MASK), 251 + len); 252 + 253 + gnttab_foreach_grant_in_range(page, offset, len, fn, data); 254 + } 255 + 256 + /* Get @nr_grefs grants from an array of page and call fn for each grant */ 257 + void gnttab_foreach_grant(struct page **pages, 258 + unsigned int nr_grefs, 259 + xen_grant_fn_t fn, 260 + void *data); 261 + 262 + /* Get the number of grant in a specified region 263 + * 264 + * start: Offset from the beginning of the first page 265 + * len: total length of data (can cross multiple page) 266 + */ 267 + static inline unsigned int gnttab_count_grant(unsigned int start, 268 + unsigned int len) 269 + { 270 + return XEN_PFN_UP(xen_offset_in_page(start) + len); 236 271 } 237 272 238 273 #endif /* __ASM_GNTTAB_H__ */

+26 -1

include/xen/page.h

··· 1 1 #ifndef _XEN_PAGE_H 2 2 #define _XEN_PAGE_H 3 3 4 + #include <asm/page.h> 5 + 6 + /* The hypercall interface supports only 4KB page */ 7 + #define XEN_PAGE_SHIFT 12 8 + #define XEN_PAGE_SIZE (_AC(1, UL) << XEN_PAGE_SHIFT) 9 + #define XEN_PAGE_MASK (~(XEN_PAGE_SIZE-1)) 10 + #define xen_offset_in_page(p) ((unsigned long)(p) & ~XEN_PAGE_MASK) 11 + 12 + /* 13 + * We assume that PAGE_SIZE is a multiple of XEN_PAGE_SIZE 14 + * XXX: Add a BUILD_BUG_ON? 15 + */ 16 + 17 + #define xen_pfn_to_page(xen_pfn) \ 18 + ((pfn_to_page(((unsigned long)(xen_pfn) << XEN_PAGE_SHIFT) >> PAGE_SHIFT))) 19 + #define page_to_xen_pfn(page) \ 20 + (((page_to_pfn(page)) << PAGE_SHIFT) >> XEN_PAGE_SHIFT) 21 + 22 + #define XEN_PFN_PER_PAGE (PAGE_SIZE / XEN_PAGE_SIZE) 23 + 24 + #define XEN_PFN_DOWN(x) ((x) >> XEN_PAGE_SHIFT) 25 + #define XEN_PFN_UP(x) (((x) + XEN_PAGE_SIZE-1) >> XEN_PAGE_SHIFT) 26 + #define XEN_PFN_PHYS(x) ((phys_addr_t)(x) << XEN_PAGE_SHIFT) 27 + 4 28 #include <asm/xen/page.h> 5 29 30 + /* Return the GFN associated to the first 4KB of the page */ 6 31 static inline unsigned long xen_page_to_gfn(struct page *page) 7 32 { 8 - return pfn_to_gfn(page_to_pfn(page)); 33 + return pfn_to_gfn(page_to_xen_pfn(page)); 9 34 } 10 35 11 36 struct xen_memory_region {

+2 -2

include/xen/xenbus.h

··· 46 46 #include <xen/interface/io/xenbus.h> 47 47 #include <xen/interface/io/xs_wire.h> 48 48 49 - #define XENBUS_MAX_RING_PAGE_ORDER 4 50 - #define XENBUS_MAX_RING_PAGES (1U << XENBUS_MAX_RING_PAGE_ORDER) 49 + #define XENBUS_MAX_RING_GRANT_ORDER 4 50 + #define XENBUS_MAX_RING_GRANTS (1U << XENBUS_MAX_RING_GRANT_ORDER) 51 51 #define INVALID_GRANT_HANDLE (~0U) 52 52 53 53 /* Register callback to watch this node. */

+21 -8

mm/memory_hotplug.c

··· 1232 1232 } 1233 1233 1234 1234 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ 1235 - int __ref add_memory(int nid, u64 start, u64 size) 1235 + int __ref add_memory_resource(int nid, struct resource *res) 1236 1236 { 1237 + u64 start, size; 1237 1238 pg_data_t *pgdat = NULL; 1238 1239 bool new_pgdat; 1239 1240 bool new_node; 1240 - struct resource *res; 1241 1241 int ret; 1242 + 1243 + start = res->start; 1244 + size = resource_size(res); 1242 1245 1243 1246 ret = check_hotplug_memory_range(start, size); 1244 1247 if (ret) 1245 - return ret; 1246 - 1247 - res = register_memory_resource(start, size); 1248 - ret = -EEXIST; 1249 - if (!res) 1250 1248 return ret; 1251 1249 1252 1250 { /* Stupid hack to suppress address-never-null warning */ ··· 1298 1300 /* rollback pgdat allocation and others */ 1299 1301 if (new_pgdat) 1300 1302 rollback_node_hotadd(nid, pgdat); 1301 - release_memory_resource(res); 1302 1303 memblock_remove(start, size); 1303 1304 1304 1305 out: 1305 1306 mem_hotplug_done(); 1307 + return ret; 1308 + } 1309 + EXPORT_SYMBOL_GPL(add_memory_resource); 1310 + 1311 + int __ref add_memory(int nid, u64 start, u64 size) 1312 + { 1313 + struct resource *res; 1314 + int ret; 1315 + 1316 + res = register_memory_resource(start, size); 1317 + if (!res) 1318 + return -EEXIST; 1319 + 1320 + ret = add_memory_resource(nid, res); 1321 + if (ret < 0) 1322 + release_memory_resource(res); 1306 1323 return ret; 1307 1324 } 1308 1325 EXPORT_SYMBOL_GPL(add_memory);