Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'stable/for-linus-3.20-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen features and fixes from David Vrabel:

- Reworked handling for foreign (grant mapped) pages to simplify the
code, enable a number of additional use cases and fix a number of
long-standing bugs.

- Prefer the TSC over the Xen PV clock when dom0 (and the TSC is
stable).

- Assorted other cleanup and minor bug fixes.

* tag 'stable/for-linus-3.20-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (25 commits)
xen/manage: Fix USB interaction issues when resuming
xenbus: Add proper handling of XS_ERROR from Xenbus for transactions.
xen/gntdev: provide find_special_page VMA operation
xen/gntdev: mark userspace PTEs as special on x86 PV guests
xen-blkback: safely unmap grants in case they are still in use
xen/gntdev: safely unmap grants in case they are still in use
xen/gntdev: convert priv->lock to a mutex
xen/grant-table: add a mechanism to safely unmap pages that are in use
xen-netback: use foreign page information from the pages themselves
xen: mark grant mapped pages as foreign
xen/grant-table: add helpers for allocating pages
x86/xen: require ballooned pages for grant maps
xen: remove scratch frames for ballooned pages and m2p override
xen/grant-table: pre-populate kernel unmap ops for xen_gnttab_unmap_refs()
mm: add 'foreign' alias for the 'pinned' page flag
mm: provide a find_special_page vma operation
x86/xen: cleanup arch/x86/xen/mmu.c
x86/xen: add some __init annotations in arch/x86/xen/mmu.c
x86/xen: add some __init and static annotations in arch/x86/xen/setup.c
x86/xen: use correct types for addresses in arch/x86/xen/setup.c
...

+509 -594
+1 -1
arch/arm/include/asm/xen/page.h
··· 92 92 struct page **pages, unsigned int count); 93 93 94 94 extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, 95 - struct gnttab_map_grant_ref *kmap_ops, 95 + struct gnttab_unmap_grant_ref *kunmap_ops, 96 96 struct page **pages, unsigned int count); 97 97 98 98 bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+2 -2
arch/arm/xen/enlighten.c
··· 29 29 30 30 struct start_info _xen_start_info; 31 31 struct start_info *xen_start_info = &_xen_start_info; 32 - EXPORT_SYMBOL_GPL(xen_start_info); 32 + EXPORT_SYMBOL(xen_start_info); 33 33 34 34 enum xen_domain_type xen_domain_type = XEN_NATIVE; 35 - EXPORT_SYMBOL_GPL(xen_domain_type); 35 + EXPORT_SYMBOL(xen_domain_type); 36 36 37 37 struct shared_info xen_dummy_shared_info; 38 38 struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
+1 -1
arch/arm/xen/mm.c
··· 149 149 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); 150 150 151 151 struct dma_map_ops *xen_dma_ops; 152 - EXPORT_SYMBOL_GPL(xen_dma_ops); 152 + EXPORT_SYMBOL(xen_dma_ops); 153 153 154 154 static struct dma_map_ops xen_swiotlb_dma_ops = { 155 155 .mapping_error = xen_swiotlb_dma_mapping_error,
+1 -1
arch/arm/xen/p2m.c
··· 102 102 EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); 103 103 104 104 int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, 105 - struct gnttab_map_grant_ref *kmap_ops, 105 + struct gnttab_unmap_grant_ref *kunmap_ops, 106 106 struct page **pages, unsigned int count) 107 107 { 108 108 int i;
+5 -15
arch/x86/include/asm/xen/page.h
··· 55 55 struct gnttab_map_grant_ref *kmap_ops, 56 56 struct page **pages, unsigned int count); 57 57 extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, 58 - struct gnttab_map_grant_ref *kmap_ops, 58 + struct gnttab_unmap_grant_ref *kunmap_ops, 59 59 struct page **pages, unsigned int count); 60 - extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); 61 60 62 61 /* 63 62 * Helper functions to write or read unsigned long values to/from ··· 153 154 return mfn; 154 155 155 156 pfn = mfn_to_pfn_no_overrides(mfn); 156 - if (__pfn_to_mfn(pfn) != mfn) { 157 - /* 158 - * If this appears to be a foreign mfn (because the pfn 159 - * doesn't map back to the mfn), then check the local override 160 - * table to see if there's a better pfn to use. 161 - * 162 - * m2p_find_override_pfn returns ~0 if it doesn't find anything. 163 - */ 164 - pfn = m2p_find_override_pfn(mfn, ~0); 165 - } 157 + if (__pfn_to_mfn(pfn) != mfn) 158 + pfn = ~0; 166 159 167 160 /* 168 - * pfn is ~0 if there are no entries in the m2p for mfn or if the 169 - * entry doesn't map back to the mfn and m2p_override doesn't have a 170 - * valid entry for it. 161 + * pfn is ~0 if there are no entries in the m2p for mfn or the 162 + * entry doesn't map back to the mfn. 171 163 */ 172 164 if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn)) 173 165 pfn = mfn;
+8 -9
arch/x86/xen/mmu.c
··· 1489 1489 native_set_pte(ptep, pte); 1490 1490 } 1491 1491 1492 - static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 1492 + static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 1493 1493 { 1494 1494 struct mmuext_op op; 1495 1495 op.cmd = cmd; ··· 1657 1657 * Like __va(), but returns address in the kernel mapping (which is 1658 1658 * all we have until the physical memory mapping has been set up. 1659 1659 */ 1660 - static void *__ka(phys_addr_t paddr) 1660 + static void * __init __ka(phys_addr_t paddr) 1661 1661 { 1662 1662 #ifdef CONFIG_X86_64 1663 1663 return (void *)(paddr + __START_KERNEL_map); ··· 1667 1667 } 1668 1668 1669 1669 /* Convert a machine address to physical address */ 1670 - static unsigned long m2p(phys_addr_t maddr) 1670 + static unsigned long __init m2p(phys_addr_t maddr) 1671 1671 { 1672 1672 phys_addr_t paddr; 1673 1673 ··· 1678 1678 } 1679 1679 1680 1680 /* Convert a machine address to kernel virtual */ 1681 - static void *m2v(phys_addr_t maddr) 1681 + static void * __init m2v(phys_addr_t maddr) 1682 1682 { 1683 1683 return __ka(m2p(maddr)); 1684 1684 } 1685 1685 1686 1686 /* Set the page permissions on an identity-mapped pages */ 1687 - static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) 1687 + static void __init set_page_prot_flags(void *addr, pgprot_t prot, 1688 + unsigned long flags) 1688 1689 { 1689 1690 unsigned long pfn = __pa(addr) >> PAGE_SHIFT; 1690 1691 pte_t pte = pfn_pte(pfn, prot); ··· 1697 1696 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) 1698 1697 BUG(); 1699 1698 } 1700 - static void set_page_prot(void *addr, pgprot_t prot) 1699 + static void __init set_page_prot(void *addr, pgprot_t prot) 1701 1700 { 1702 1701 return set_page_prot_flags(addr, prot, UVMF_NONE); 1703 1702 } ··· 1734 1733 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { 1735 1734 pte_t pte; 1736 1735 1737 - #ifdef CONFIG_X86_32 1738 1736 if (pfn > max_pfn_mapped) 1739 1737 max_pfn_mapped = pfn; 1740 - #endif 1741 1738 1742 1739 if (!pte_none(pte_page[pteidx])) 1743 1740 continue; ··· 1768 1769 } 1769 1770 1770 1771 #ifdef CONFIG_X86_64 1771 - static void convert_pfn_mfn(void *v) 1772 + static void __init convert_pfn_mfn(void *v) 1772 1773 { 1773 1774 pte_t *pte = v; 1774 1775 int i;
+11 -256
arch/x86/xen/p2m.c
··· 84 84 85 85 #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) 86 86 87 - static void __init m2p_override_init(void); 88 - 89 87 unsigned long *xen_p2m_addr __read_mostly; 90 88 EXPORT_SYMBOL_GPL(xen_p2m_addr); 91 89 unsigned long xen_p2m_size __read_mostly; ··· 400 402 xen_p2m_size = xen_max_p2m_pfn; 401 403 402 404 xen_inv_extra_mem(); 403 - 404 - m2p_override_init(); 405 405 } 406 406 407 407 unsigned long get_phys_to_machine(unsigned long pfn) ··· 648 652 return true; 649 653 } 650 654 651 - #define M2P_OVERRIDE_HASH_SHIFT 10 652 - #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) 653 - 654 - static struct list_head *m2p_overrides; 655 - static DEFINE_SPINLOCK(m2p_override_lock); 656 - 657 - static void __init m2p_override_init(void) 658 - { 659 - unsigned i; 660 - 661 - m2p_overrides = alloc_bootmem_align( 662 - sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, 663 - sizeof(unsigned long)); 664 - 665 - for (i = 0; i < M2P_OVERRIDE_HASH; i++) 666 - INIT_LIST_HEAD(&m2p_overrides[i]); 667 - } 668 - 669 - static unsigned long mfn_hash(unsigned long mfn) 670 - { 671 - return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); 672 - } 673 - 674 - /* Add an MFN override for a particular page */ 675 - static int m2p_add_override(unsigned long mfn, struct page *page, 676 - struct gnttab_map_grant_ref *kmap_op) 677 - { 678 - unsigned long flags; 679 - unsigned long pfn; 680 - unsigned long uninitialized_var(address); 681 - unsigned level; 682 - pte_t *ptep = NULL; 683 - 684 - pfn = page_to_pfn(page); 685 - if (!PageHighMem(page)) { 686 - address = (unsigned long)__va(pfn << PAGE_SHIFT); 687 - ptep = lookup_address(address, &level); 688 - if (WARN(ptep == NULL || level != PG_LEVEL_4K, 689 - "m2p_add_override: pfn %lx not mapped", pfn)) 690 - return -EINVAL; 691 - } 692 - 693 - if (kmap_op != NULL) { 694 - if (!PageHighMem(page)) { 695 - struct multicall_space mcs = 696 - xen_mc_entry(sizeof(*kmap_op)); 697 - 698 - MULTI_grant_table_op(mcs.mc, 699 - GNTTABOP_map_grant_ref, kmap_op, 1); 700 - 701 - xen_mc_issue(PARAVIRT_LAZY_MMU); 702 - } 703 - } 704 - spin_lock_irqsave(&m2p_override_lock, flags); 705 - list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); 706 - spin_unlock_irqrestore(&m2p_override_lock, flags); 707 - 708 - /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in 709 - * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other 710 - * pfn so that the following mfn_to_pfn(mfn) calls will return the 711 - * pfn from the m2p_override (the backend pfn) instead. 712 - * We need to do this because the pages shared by the frontend 713 - * (xen-blkfront) can be already locked (lock_page, called by 714 - * do_read_cache_page); when the userspace backend tries to use them 715 - * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so 716 - * do_blockdev_direct_IO is going to try to lock the same pages 717 - * again resulting in a deadlock. 718 - * As a side effect get_user_pages_fast might not be safe on the 719 - * frontend pages while they are being shared with the backend, 720 - * because mfn_to_pfn (that ends up being called by GUPF) will 721 - * return the backend pfn rather than the frontend pfn. */ 722 - pfn = mfn_to_pfn_no_overrides(mfn); 723 - if (__pfn_to_mfn(pfn) == mfn) 724 - set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); 725 - 726 - return 0; 727 - } 728 - 729 655 int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, 730 656 struct gnttab_map_grant_ref *kmap_ops, 731 657 struct page **pages, unsigned int count) 732 658 { 733 659 int i, ret = 0; 734 - bool lazy = false; 735 660 pte_t *pte; 736 661 737 662 if (xen_feature(XENFEAT_auto_translated_physmap)) 738 663 return 0; 739 664 740 - if (kmap_ops && 741 - !in_interrupt() && 742 - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { 743 - arch_enter_lazy_mmu_mode(); 744 - lazy = true; 665 + if (kmap_ops) { 666 + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 667 + kmap_ops, count); 668 + if (ret) 669 + goto out; 745 670 } 746 671 747 672 for (i = 0; i < count; i++) { ··· 681 764 } 682 765 pfn = page_to_pfn(pages[i]); 683 766 684 - WARN_ON(PagePrivate(pages[i])); 685 - SetPagePrivate(pages[i]); 686 - set_page_private(pages[i], mfn); 687 - pages[i]->index = pfn_to_mfn(pfn); 767 + WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); 688 768 689 769 if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { 690 770 ret = -ENOMEM; 691 771 goto out; 692 772 } 693 - 694 - if (kmap_ops) { 695 - ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); 696 - if (ret) 697 - goto out; 698 - } 699 773 } 700 774 701 775 out: 702 - if (lazy) 703 - arch_leave_lazy_mmu_mode(); 704 - 705 776 return ret; 706 777 } 707 778 EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); 708 779 709 - static struct page *m2p_find_override(unsigned long mfn) 710 - { 711 - unsigned long flags; 712 - struct list_head *bucket; 713 - struct page *p, *ret; 714 - 715 - if (unlikely(!m2p_overrides)) 716 - return NULL; 717 - 718 - ret = NULL; 719 - bucket = &m2p_overrides[mfn_hash(mfn)]; 720 - 721 - spin_lock_irqsave(&m2p_override_lock, flags); 722 - 723 - list_for_each_entry(p, bucket, lru) { 724 - if (page_private(p) == mfn) { 725 - ret = p; 726 - break; 727 - } 728 - } 729 - 730 - spin_unlock_irqrestore(&m2p_override_lock, flags); 731 - 732 - return ret; 733 - } 734 - 735 - static int m2p_remove_override(struct page *page, 736 - struct gnttab_map_grant_ref *kmap_op, 737 - unsigned long mfn) 738 - { 739 - unsigned long flags; 740 - unsigned long pfn; 741 - unsigned long uninitialized_var(address); 742 - unsigned level; 743 - pte_t *ptep = NULL; 744 - 745 - pfn = page_to_pfn(page); 746 - 747 - if (!PageHighMem(page)) { 748 - address = (unsigned long)__va(pfn << PAGE_SHIFT); 749 - ptep = lookup_address(address, &level); 750 - 751 - if (WARN(ptep == NULL || level != PG_LEVEL_4K, 752 - "m2p_remove_override: pfn %lx not mapped", pfn)) 753 - return -EINVAL; 754 - } 755 - 756 - spin_lock_irqsave(&m2p_override_lock, flags); 757 - list_del(&page->lru); 758 - spin_unlock_irqrestore(&m2p_override_lock, flags); 759 - 760 - if (kmap_op != NULL) { 761 - if (!PageHighMem(page)) { 762 - struct multicall_space mcs; 763 - struct gnttab_unmap_and_replace *unmap_op; 764 - struct page *scratch_page = get_balloon_scratch_page(); 765 - unsigned long scratch_page_address = (unsigned long) 766 - __va(page_to_pfn(scratch_page) << PAGE_SHIFT); 767 - 768 - /* 769 - * It might be that we queued all the m2p grant table 770 - * hypercalls in a multicall, then m2p_remove_override 771 - * get called before the multicall has actually been 772 - * issued. In this case handle is going to -1 because 773 - * it hasn't been modified yet. 774 - */ 775 - if (kmap_op->handle == -1) 776 - xen_mc_flush(); 777 - /* 778 - * Now if kmap_op->handle is negative it means that the 779 - * hypercall actually returned an error. 780 - */ 781 - if (kmap_op->handle == GNTST_general_error) { 782 - pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings", 783 - pfn, mfn); 784 - put_balloon_scratch_page(); 785 - return -1; 786 - } 787 - 788 - xen_mc_batch(); 789 - 790 - mcs = __xen_mc_entry( 791 - sizeof(struct gnttab_unmap_and_replace)); 792 - unmap_op = mcs.args; 793 - unmap_op->host_addr = kmap_op->host_addr; 794 - unmap_op->new_addr = scratch_page_address; 795 - unmap_op->handle = kmap_op->handle; 796 - 797 - MULTI_grant_table_op(mcs.mc, 798 - GNTTABOP_unmap_and_replace, unmap_op, 1); 799 - 800 - mcs = __xen_mc_entry(0); 801 - MULTI_update_va_mapping(mcs.mc, scratch_page_address, 802 - pfn_pte(page_to_pfn(scratch_page), 803 - PAGE_KERNEL_RO), 0); 804 - 805 - xen_mc_issue(PARAVIRT_LAZY_MMU); 806 - 807 - kmap_op->host_addr = 0; 808 - put_balloon_scratch_page(); 809 - } 810 - } 811 - 812 - /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present 813 - * somewhere in this domain, even before being added to the 814 - * m2p_override (see comment above in m2p_add_override). 815 - * If there are no other entries in the m2p_override corresponding 816 - * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for 817 - * the original pfn (the one shared by the frontend): the backend 818 - * cannot do any IO on this page anymore because it has been 819 - * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of 820 - * the original pfn causes mfn_to_pfn(mfn) to return the frontend 821 - * pfn again. */ 822 - mfn &= ~FOREIGN_FRAME_BIT; 823 - pfn = mfn_to_pfn_no_overrides(mfn); 824 - if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) && 825 - m2p_find_override(mfn) == NULL) 826 - set_phys_to_machine(pfn, mfn); 827 - 828 - return 0; 829 - } 830 - 831 780 int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, 832 - struct gnttab_map_grant_ref *kmap_ops, 781 + struct gnttab_unmap_grant_ref *kunmap_ops, 833 782 struct page **pages, unsigned int count) 834 783 { 835 784 int i, ret = 0; 836 - bool lazy = false; 837 785 838 786 if (xen_feature(XENFEAT_auto_translated_physmap)) 839 787 return 0; 840 - 841 - if (kmap_ops && 842 - !in_interrupt() && 843 - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { 844 - arch_enter_lazy_mmu_mode(); 845 - lazy = true; 846 - } 847 788 848 789 for (i = 0; i < count; i++) { 849 790 unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); ··· 712 937 goto out; 713 938 } 714 939 715 - set_page_private(pages[i], INVALID_P2M_ENTRY); 716 - WARN_ON(!PagePrivate(pages[i])); 717 - ClearPagePrivate(pages[i]); 718 - set_phys_to_machine(pfn, pages[i]->index); 719 - 720 - if (kmap_ops) 721 - ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); 722 - if (ret) 723 - goto out; 940 + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 724 941 } 725 - 942 + if (kunmap_ops) 943 + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 944 + kunmap_ops, count); 726 945 out: 727 - if (lazy) 728 - arch_leave_lazy_mmu_mode(); 729 946 return ret; 730 947 } 731 948 EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); 732 - 733 - unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) 734 - { 735 - struct page *p = m2p_find_override(mfn); 736 - unsigned long ret = pfn; 737 - 738 - if (p) 739 - ret = page_to_pfn(p); 740 - 741 - return ret; 742 - } 743 - EXPORT_SYMBOL_GPL(m2p_find_override_pfn); 744 949 745 950 #ifdef CONFIG_XEN_DEBUG_FS 746 951 #include <linux/debugfs.h>
+14 -23
arch/x86/xen/setup.c
··· 32 32 #include "p2m.h" 33 33 #include "mmu.h" 34 34 35 - /* These are code, but not functions. Defined in entry.S */ 36 - extern const char xen_hypervisor_callback[]; 37 - extern const char xen_failsafe_callback[]; 38 - #ifdef CONFIG_X86_64 39 - extern asmlinkage void nmi(void); 40 - #endif 41 - extern void xen_sysenter_target(void); 42 - extern void xen_syscall_target(void); 43 - extern void xen_syscall32_target(void); 44 - 45 35 /* Amount of extra memory space we add to the e820 ranges */ 46 36 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; 47 37 ··· 64 74 */ 65 75 #define EXTRA_MEM_RATIO (10) 66 76 67 - static void __init xen_add_extra_mem(u64 start, u64 size) 77 + static void __init xen_add_extra_mem(phys_addr_t start, phys_addr_t size) 68 78 { 69 79 int i; 70 80 ··· 87 97 memblock_reserve(start, size); 88 98 } 89 99 90 - static void __init xen_del_extra_mem(u64 start, u64 size) 100 + static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size) 91 101 { 92 102 int i; 93 - u64 start_r, size_r; 103 + phys_addr_t start_r, size_r; 94 104 95 105 for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { 96 106 start_r = xen_extra_mem[i].start; ··· 257 267 static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) 258 268 { 259 269 struct mmu_update update = { 260 - .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, 270 + .ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, 261 271 .val = pfn 262 272 }; 263 273 ··· 535 545 return min(max_pages, MAX_DOMAIN_PAGES); 536 546 } 537 547 538 - static void xen_align_and_add_e820_region(u64 start, u64 size, int type) 548 + static void __init xen_align_and_add_e820_region(phys_addr_t start, 549 + phys_addr_t size, int type) 539 550 { 540 - u64 end = start + size; 551 + phys_addr_t end = start + size; 541 552 542 553 /* Align RAM regions to page boundaries. */ 543 554 if (type == E820_RAM) { 544 555 start = PAGE_ALIGN(start); 545 - end &= ~((u64)PAGE_SIZE - 1); 556 + end &= ~((phys_addr_t)PAGE_SIZE - 1); 546 557 } 547 558 548 559 e820_add_region(start, end - start, type); 549 560 } 550 561 551 - void xen_ignore_unusable(struct e820entry *list, size_t map_size) 562 + static void __init xen_ignore_unusable(struct e820entry *list, size_t map_size) 552 563 { 553 564 struct e820entry *entry; 554 565 unsigned int i; ··· 568 577 static struct e820entry map[E820MAX] __initdata; 569 578 570 579 unsigned long max_pfn = xen_start_info->nr_pages; 571 - unsigned long long mem_end; 580 + phys_addr_t mem_end; 572 581 int rc; 573 582 struct xen_memory_map memmap; 574 583 unsigned long max_pages; ··· 643 652 extra_pages); 644 653 i = 0; 645 654 while (i < memmap.nr_entries) { 646 - u64 addr = map[i].addr; 647 - u64 size = map[i].size; 655 + phys_addr_t addr = map[i].addr; 656 + phys_addr_t size = map[i].size; 648 657 u32 type = map[i].type; 649 658 650 659 if (type == E820_RAM) { 651 660 if (addr < mem_end) { 652 661 size = min(size, mem_end - addr); 653 662 } else if (extra_pages) { 654 - size = min(size, (u64)extra_pages * PAGE_SIZE); 655 - extra_pages -= size / PAGE_SIZE; 663 + size = min(size, PFN_PHYS(extra_pages)); 664 + extra_pages -= PFN_DOWN(size); 656 665 xen_add_extra_mem(addr, size); 657 666 xen_max_p2m_pfn = PFN_DOWN(addr + size); 658 667 } else
+1 -1
arch/x86/xen/smp.c
··· 507 507 static void xen_cpu_die(unsigned int cpu) 508 508 { 509 509 while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { 510 - current->state = TASK_UNINTERRUPTIBLE; 510 + __set_current_state(TASK_UNINTERRUPTIBLE); 511 511 schedule_timeout(HZ/10); 512 512 } 513 513
+4
arch/x86/xen/time.c
··· 479 479 int cpu = smp_processor_id(); 480 480 struct timespec tp; 481 481 482 + /* As Dom0 is never moved, no penalty on using TSC there */ 483 + if (xen_initial_domain()) 484 + xen_clocksource.rating = 275; 485 + 482 486 clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); 483 487 484 488 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
+6
arch/x86/xen/xen-ops.h
··· 10 10 extern const char xen_hypervisor_callback[]; 11 11 extern const char xen_failsafe_callback[]; 12 12 13 + void xen_sysenter_target(void); 14 + #ifdef CONFIG_X86_64 15 + void xen_syscall_target(void); 16 + void xen_syscall32_target(void); 17 + #endif 18 + 13 19 extern void *xen_initial_gdt; 14 20 15 21 struct trap_info;
+123 -54
drivers/block/xen-blkback/blkback.c
··· 47 47 #include <asm/xen/hypervisor.h> 48 48 #include <asm/xen/hypercall.h> 49 49 #include <xen/balloon.h> 50 + #include <xen/grant_table.h> 50 51 #include "common.h" 51 52 52 53 /* ··· 101 100 102 101 #define BLKBACK_INVALID_HANDLE (~0) 103 102 104 - /* Number of free pages to remove on each call to free_xenballooned_pages */ 103 + /* Number of free pages to remove on each call to gnttab_free_pages */ 105 104 #define NUM_BATCH_FREE_PAGES 10 106 105 107 106 static inline int get_free_page(struct xen_blkif *blkif, struct page **page) ··· 112 111 if (list_empty(&blkif->free_pages)) { 113 112 BUG_ON(blkif->free_pages_num != 0); 114 113 spin_unlock_irqrestore(&blkif->free_pages_lock, flags); 115 - return alloc_xenballooned_pages(1, page, false); 114 + return gnttab_alloc_pages(1, page); 116 115 } 117 116 BUG_ON(blkif->free_pages_num == 0); 118 117 page[0] = list_first_entry(&blkif->free_pages, struct page, lru); ··· 152 151 blkif->free_pages_num--; 153 152 if (++num_pages == NUM_BATCH_FREE_PAGES) { 154 153 spin_unlock_irqrestore(&blkif->free_pages_lock, flags); 155 - free_xenballooned_pages(num_pages, page); 154 + gnttab_free_pages(num_pages, page); 156 155 spin_lock_irqsave(&blkif->free_pages_lock, flags); 157 156 num_pages = 0; 158 157 } 159 158 } 160 159 spin_unlock_irqrestore(&blkif->free_pages_lock, flags); 161 160 if (num_pages != 0) 162 - free_xenballooned_pages(num_pages, page); 161 + gnttab_free_pages(num_pages, page); 163 162 } 164 163 165 164 #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) ··· 263 262 atomic_dec(&blkif->persistent_gnt_in_use); 264 263 } 265 264 265 + static void free_persistent_gnts_unmap_callback(int result, 266 + struct gntab_unmap_queue_data *data) 267 + { 268 + struct completion *c = data->data; 269 + 270 + /* BUG_ON used to reproduce existing behaviour, 271 + but is this the best way to deal with this? */ 272 + BUG_ON(result); 273 + complete(c); 274 + } 275 + 266 276 static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, 267 277 unsigned int num) 268 278 { ··· 281 269 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 282 270 struct persistent_gnt *persistent_gnt; 283 271 struct rb_node *n; 284 - int ret = 0; 285 272 int segs_to_unmap = 0; 273 + struct gntab_unmap_queue_data unmap_data; 274 + struct completion unmap_completion; 275 + 276 + init_completion(&unmap_completion); 277 + 278 + unmap_data.data = &unmap_completion; 279 + unmap_data.done = &free_persistent_gnts_unmap_callback; 280 + unmap_data.pages = pages; 281 + unmap_data.unmap_ops = unmap; 282 + unmap_data.kunmap_ops = NULL; 286 283 287 284 foreach_grant_safe(persistent_gnt, n, root, node) { 288 285 BUG_ON(persistent_gnt->handle == ··· 306 285 307 286 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || 308 287 !rb_next(&persistent_gnt->node)) { 309 - ret = gnttab_unmap_refs(unmap, NULL, pages, 310 - segs_to_unmap); 311 - BUG_ON(ret); 288 + 289 + unmap_data.count = segs_to_unmap; 290 + gnttab_unmap_refs_async(&unmap_data); 291 + wait_for_completion(&unmap_completion); 292 + 312 293 put_free_pages(blkif, pages, segs_to_unmap); 313 294 segs_to_unmap = 0; 314 295 } ··· 676 653 shrink_free_pagepool(blkif, 0 /* All */); 677 654 } 678 655 679 - /* 680 - * Unmap the grant references, and also remove the M2P over-rides 681 - * used in the 'pending_req'. 682 - */ 683 - static void xen_blkbk_unmap(struct xen_blkif *blkif, 684 - struct grant_page *pages[], 685 - int num) 656 + static unsigned int xen_blkbk_unmap_prepare( 657 + struct xen_blkif *blkif, 658 + struct grant_page **pages, 659 + unsigned int num, 660 + struct gnttab_unmap_grant_ref *unmap_ops, 661 + struct page **unmap_pages) 686 662 { 687 - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 688 - struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 689 663 unsigned int i, invcount = 0; 690 - int ret; 691 664 692 665 for (i = 0; i < num; i++) { 693 666 if (pages[i]->persistent_gnt != NULL) { ··· 693 674 if (pages[i]->handle == BLKBACK_INVALID_HANDLE) 694 675 continue; 695 676 unmap_pages[invcount] = pages[i]->page; 696 - gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page), 677 + gnttab_set_unmap_op(&unmap_ops[invcount], vaddr(pages[i]->page), 697 678 GNTMAP_host_map, pages[i]->handle); 698 679 pages[i]->handle = BLKBACK_INVALID_HANDLE; 699 - if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { 700 - ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, 701 - invcount); 680 + invcount++; 681 + } 682 + 683 + return invcount; 684 + } 685 + 686 + static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data) 687 + { 688 + struct pending_req* pending_req = (struct pending_req*) (data->data); 689 + struct xen_blkif *blkif = pending_req->blkif; 690 + 691 + /* BUG_ON used to reproduce existing behaviour, 692 + but is this the best way to deal with this? */ 693 + BUG_ON(result); 694 + 695 + put_free_pages(blkif, data->pages, data->count); 696 + make_response(blkif, pending_req->id, 697 + pending_req->operation, pending_req->status); 698 + free_req(blkif, pending_req); 699 + /* 700 + * Make sure the request is freed before releasing blkif, 701 + * or there could be a race between free_req and the 702 + * cleanup done in xen_blkif_free during shutdown. 703 + * 704 + * NB: The fact that we might try to wake up pending_free_wq 705 + * before drain_complete (in case there's a drain going on) 706 + * it's not a problem with our current implementation 707 + * because we can assure there's no thread waiting on 708 + * pending_free_wq if there's a drain going on, but it has 709 + * to be taken into account if the current model is changed. 710 + */ 711 + if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { 712 + complete(&blkif->drain_complete); 713 + } 714 + xen_blkif_put(blkif); 715 + } 716 + 717 + static void xen_blkbk_unmap_and_respond(struct pending_req *req) 718 + { 719 + struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data; 720 + struct xen_blkif *blkif = req->blkif; 721 + struct grant_page **pages = req->segments; 722 + unsigned int invcount; 723 + 724 + invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages, 725 + req->unmap, req->unmap_pages); 726 + 727 + work->data = req; 728 + work->done = xen_blkbk_unmap_and_respond_callback; 729 + work->unmap_ops = req->unmap; 730 + work->kunmap_ops = NULL; 731 + work->pages = req->unmap_pages; 732 + work->count = invcount; 733 + 734 + gnttab_unmap_refs_async(&req->gnttab_unmap_data); 735 + } 736 + 737 + 738 + /* 739 + * Unmap the grant references. 740 + * 741 + * This could accumulate ops up to the batch size to reduce the number 742 + * of hypercalls, but since this is only used in error paths there's 743 + * no real need. 744 + */ 745 + static void xen_blkbk_unmap(struct xen_blkif *blkif, 746 + struct grant_page *pages[], 747 + int num) 748 + { 749 + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 750 + struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 751 + unsigned int invcount = 0; 752 + int ret; 753 + 754 + while (num) { 755 + unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST); 756 + 757 + invcount = xen_blkbk_unmap_prepare(blkif, pages, batch, 758 + unmap, unmap_pages); 759 + if (invcount) { 760 + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); 702 761 BUG_ON(ret); 703 762 put_free_pages(blkif, unmap_pages, invcount); 704 - invcount = 0; 705 763 } 706 - } 707 - if (invcount) { 708 - ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); 709 - BUG_ON(ret); 710 - put_free_pages(blkif, unmap_pages, invcount); 764 + pages += batch; 765 + num -= batch; 711 766 } 712 767 } 713 768 ··· 1075 982 * the grant references associated with 'request' and provide 1076 983 * the proper response on the ring. 1077 984 */ 1078 - if (atomic_dec_and_test(&pending_req->pendcnt)) { 1079 - struct xen_blkif *blkif = pending_req->blkif; 1080 - 1081 - xen_blkbk_unmap(blkif, 1082 - pending_req->segments, 1083 - pending_req->nr_pages); 1084 - make_response(blkif, pending_req->id, 1085 - pending_req->operation, pending_req->status); 1086 - free_req(blkif, pending_req); 1087 - /* 1088 - * Make sure the request is freed before releasing blkif, 1089 - * or there could be a race between free_req and the 1090 - * cleanup done in xen_blkif_free during shutdown. 1091 - * 1092 - * NB: The fact that we might try to wake up pending_free_wq 1093 - * before drain_complete (in case there's a drain going on) 1094 - * it's not a problem with our current implementation 1095 - * because we can assure there's no thread waiting on 1096 - * pending_free_wq if there's a drain going on, but it has 1097 - * to be taken into account if the current model is changed. 1098 - */ 1099 - if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { 1100 - complete(&blkif->drain_complete); 1101 - } 1102 - xen_blkif_put(blkif); 1103 - } 985 + if (atomic_dec_and_test(&pending_req->pendcnt)) 986 + xen_blkbk_unmap_and_respond(pending_req); 1104 987 } 1105 988 1106 989 /*
+3
drivers/block/xen-blkback/common.h
··· 350 350 struct grant_page *indirect_pages[MAX_INDIRECT_PAGES]; 351 351 struct seg_buf seg[MAX_INDIRECT_SEGMENTS]; 352 352 struct bio *biolist[MAX_INDIRECT_SEGMENTS]; 353 + struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS]; 354 + struct page *unmap_pages[MAX_INDIRECT_SEGMENTS]; 355 + struct gntab_unmap_queue_data gnttab_unmap_data; 353 356 }; 354 357 355 358
+3 -4
drivers/net/xen-netback/interface.c
··· 483 483 * better enable it. The long term solution would be to use just a 484 484 * bunch of valid page descriptors, without dependency on ballooning 485 485 */ 486 - err = alloc_xenballooned_pages(MAX_PENDING_REQS, 487 - queue->mmap_pages, 488 - false); 486 + err = gnttab_alloc_pages(MAX_PENDING_REQS, 487 + queue->mmap_pages); 489 488 if (err) { 490 489 netdev_err(queue->vif->dev, "Could not reserve mmap_pages\n"); 491 490 return -ENOMEM; ··· 663 664 */ 664 665 void xenvif_deinit_queue(struct xenvif_queue *queue) 665 666 { 666 - free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages); 667 + gnttab_free_pages(MAX_PENDING_REQS, queue->mmap_pages); 667 668 } 668 669 669 670 void xenvif_free(struct xenvif *vif)
+9 -97
drivers/net/xen-netback/netback.c
··· 314 314 static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb, 315 315 struct netrx_pending_operations *npo, 316 316 struct page *page, unsigned long size, 317 - unsigned long offset, int *head, 318 - struct xenvif_queue *foreign_queue, 319 - grant_ref_t foreign_gref) 317 + unsigned long offset, int *head) 320 318 { 321 319 struct gnttab_copy *copy_gop; 322 320 struct xenvif_rx_meta *meta; ··· 331 333 offset &= ~PAGE_MASK; 332 334 333 335 while (size > 0) { 336 + struct xen_page_foreign *foreign; 337 + 334 338 BUG_ON(offset >= PAGE_SIZE); 335 339 BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); 336 340 ··· 361 361 copy_gop->flags = GNTCOPY_dest_gref; 362 362 copy_gop->len = bytes; 363 363 364 - if (foreign_queue) { 365 - copy_gop->source.domid = foreign_queue->vif->domid; 366 - copy_gop->source.u.ref = foreign_gref; 364 + foreign = xen_page_foreign(page); 365 + if (foreign) { 366 + copy_gop->source.domid = foreign->domid; 367 + copy_gop->source.u.ref = foreign->gref; 367 368 copy_gop->flags |= GNTCOPY_source_gref; 368 369 } else { 369 370 copy_gop->source.domid = DOMID_SELF; ··· 407 406 } 408 407 409 408 /* 410 - * Find the grant ref for a given frag in a chain of struct ubuf_info's 411 - * skb: the skb itself 412 - * i: the frag's number 413 - * ubuf: a pointer to an element in the chain. It should not be NULL 414 - * 415 - * Returns a pointer to the element in the chain where the page were found. If 416 - * not found, returns NULL. 417 - * See the definition of callback_struct in common.h for more details about 418 - * the chain. 419 - */ 420 - static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb, 421 - const int i, 422 - const struct ubuf_info *ubuf) 423 - { 424 - struct xenvif_queue *foreign_queue = ubuf_to_queue(ubuf); 425 - 426 - do { 427 - u16 pending_idx = ubuf->desc; 428 - 429 - if (skb_shinfo(skb)->frags[i].page.p == 430 - foreign_queue->mmap_pages[pending_idx]) 431 - break; 432 - ubuf = (struct ubuf_info *) ubuf->ctx; 433 - } while (ubuf); 434 - 435 - return ubuf; 436 - } 437 - 438 - /* 439 409 * Prepare an SKB to be transmitted to the frontend. 440 410 * 441 411 * This function is responsible for allocating grant operations, meta ··· 431 459 int head = 1; 432 460 int old_meta_prod; 433 461 int gso_type; 434 - const struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg; 435 - const struct ubuf_info *const head_ubuf = ubuf; 436 462 437 463 old_meta_prod = npo->meta_prod; 438 464 ··· 477 507 len = skb_tail_pointer(skb) - data; 478 508 479 509 xenvif_gop_frag_copy(queue, skb, npo, 480 - virt_to_page(data), len, offset, &head, 481 - NULL, 482 - 0); 510 + virt_to_page(data), len, offset, &head); 483 511 data += len; 484 512 } 485 513 486 514 for (i = 0; i < nr_frags; i++) { 487 - /* This variable also signals whether foreign_gref has a real 488 - * value or not. 489 - */ 490 - struct xenvif_queue *foreign_queue = NULL; 491 - grant_ref_t foreign_gref; 492 - 493 - if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) && 494 - (ubuf->callback == &xenvif_zerocopy_callback)) { 495 - const struct ubuf_info *const startpoint = ubuf; 496 - 497 - /* Ideally ubuf points to the chain element which 498 - * belongs to this frag. Or if frags were removed from 499 - * the beginning, then shortly before it. 500 - */ 501 - ubuf = xenvif_find_gref(skb, i, ubuf); 502 - 503 - /* Try again from the beginning of the list, if we 504 - * haven't tried from there. This only makes sense in 505 - * the unlikely event of reordering the original frags. 506 - * For injected local pages it's an unnecessary second 507 - * run. 508 - */ 509 - if (unlikely(!ubuf) && startpoint != head_ubuf) 510 - ubuf = xenvif_find_gref(skb, i, head_ubuf); 511 - 512 - if (likely(ubuf)) { 513 - u16 pending_idx = ubuf->desc; 514 - 515 - foreign_queue = ubuf_to_queue(ubuf); 516 - foreign_gref = 517 - foreign_queue->pending_tx_info[pending_idx].req.gref; 518 - /* Just a safety measure. If this was the last 519 - * element on the list, the for loop will 520 - * iterate again if a local page were added to 521 - * the end. Using head_ubuf here prevents the 522 - * second search on the chain. Or the original 523 - * frags changed order, but that's less likely. 524 - * In any way, ubuf shouldn't be NULL. 525 - */ 526 - ubuf = ubuf->ctx ? 527 - (struct ubuf_info *) ubuf->ctx : 528 - head_ubuf; 529 - } else 530 - /* This frag was a local page, added to the 531 - * array after the skb left netback. 532 - */ 533 - ubuf = head_ubuf; 534 - } 535 515 xenvif_gop_frag_copy(queue, skb, npo, 536 516 skb_frag_page(&skb_shinfo(skb)->frags[i]), 537 517 skb_frag_size(&skb_shinfo(skb)->frags[i]), 538 518 skb_shinfo(skb)->frags[i].page_offset, 539 - &head, 540 - foreign_queue, 541 - foreign_queue ? foreign_gref : UINT_MAX); 519 + &head); 542 520 } 543 521 544 522 return npo->meta_prod - old_meta_prod; ··· 1159 1241 /* Take an extra reference to offset network stack's put_page */ 1160 1242 get_page(queue->mmap_pages[pending_idx]); 1161 1243 } 1162 - /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc 1163 - * overlaps with "index", and "mapping" is not set. I think mapping 1164 - * should be set. If delivered to local stack, it would drop this 1165 - * skb in sk_filter unless the socket has the right to use it. 1166 - */ 1167 - skb->pfmemalloc = false; 1168 1244 } 1169 1245 1170 1246 static int xenvif_get_extras(struct xenvif_queue *queue,
+2 -84
drivers/xen/balloon.c
··· 92 92 93 93 /* We increase/decrease in batches which fit in a page */ 94 94 static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; 95 - static DEFINE_PER_CPU(struct page *, balloon_scratch_page); 96 95 97 96 98 97 /* List of ballooned pages, threaded through the mem_map array. */ ··· 422 423 page = pfn_to_page(pfn); 423 424 424 425 #ifdef CONFIG_XEN_HAVE_PVMMU 425 - /* 426 - * Ballooned out frames are effectively replaced with 427 - * a scratch frame. Ensure direct mappings and the 428 - * p2m are consistent. 429 - */ 430 426 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 431 427 if (!PageHighMem(page)) { 432 - struct page *scratch_page = get_balloon_scratch_page(); 433 - 434 428 ret = HYPERVISOR_update_va_mapping( 435 429 (unsigned long)__va(pfn << PAGE_SHIFT), 436 - pfn_pte(page_to_pfn(scratch_page), 437 - PAGE_KERNEL_RO), 0); 430 + __pte_ma(0), 0); 438 431 BUG_ON(ret); 439 - 440 - put_balloon_scratch_page(); 441 432 } 442 433 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 443 434 } ··· 487 498 schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); 488 499 489 500 mutex_unlock(&balloon_mutex); 490 - } 491 - 492 - struct page *get_balloon_scratch_page(void) 493 - { 494 - struct page *ret = get_cpu_var(balloon_scratch_page); 495 - BUG_ON(ret == NULL); 496 - return ret; 497 - } 498 - 499 - void put_balloon_scratch_page(void) 500 - { 501 - put_cpu_var(balloon_scratch_page); 502 501 } 503 502 504 503 /* Resets the Xen limit, sets new target, and kicks off processing. */ ··· 582 605 } 583 606 } 584 607 585 - static int alloc_balloon_scratch_page(int cpu) 586 - { 587 - if (per_cpu(balloon_scratch_page, cpu) != NULL) 588 - return 0; 589 - 590 - per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); 591 - if (per_cpu(balloon_scratch_page, cpu) == NULL) { 592 - pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); 593 - return -ENOMEM; 594 - } 595 - 596 - return 0; 597 - } 598 - 599 - 600 - static int balloon_cpu_notify(struct notifier_block *self, 601 - unsigned long action, void *hcpu) 602 - { 603 - int cpu = (long)hcpu; 604 - switch (action) { 605 - case CPU_UP_PREPARE: 606 - if (alloc_balloon_scratch_page(cpu)) 607 - return NOTIFY_BAD; 608 - break; 609 - default: 610 - break; 611 - } 612 - return NOTIFY_OK; 613 - } 614 - 615 - static struct notifier_block balloon_cpu_notifier = { 616 - .notifier_call = balloon_cpu_notify, 617 - }; 618 - 619 608 static int __init balloon_init(void) 620 609 { 621 - int i, cpu; 610 + int i; 622 611 623 612 if (!xen_domain()) 624 613 return -ENODEV; 625 - 626 - if (!xen_feature(XENFEAT_auto_translated_physmap)) { 627 - register_cpu_notifier(&balloon_cpu_notifier); 628 - 629 - get_online_cpus(); 630 - for_each_online_cpu(cpu) { 631 - if (alloc_balloon_scratch_page(cpu)) { 632 - put_online_cpus(); 633 - unregister_cpu_notifier(&balloon_cpu_notifier); 634 - return -ENOMEM; 635 - } 636 - } 637 - put_online_cpus(); 638 - } 639 614 640 615 pr_info("Initialising balloon driver\n"); 641 616 ··· 624 695 } 625 696 626 697 subsys_initcall(balloon_init); 627 - 628 - static int __init balloon_clear(void) 629 - { 630 - int cpu; 631 - 632 - for_each_possible_cpu(cpu) 633 - per_cpu(balloon_scratch_page, cpu) = NULL; 634 - 635 - return 0; 636 - } 637 - early_initcall(balloon_clear); 638 698 639 699 MODULE_LICENSE("GPL");
+111 -32
drivers/xen/gntdev.c
··· 67 67 * Only populated if populate_freeable_maps == 1 */ 68 68 struct list_head freeable_maps; 69 69 /* lock protects maps and freeable_maps */ 70 - spinlock_t lock; 70 + struct mutex lock; 71 71 struct mm_struct *mm; 72 72 struct mmu_notifier mn; 73 73 }; ··· 91 91 struct gnttab_map_grant_ref *map_ops; 92 92 struct gnttab_unmap_grant_ref *unmap_ops; 93 93 struct gnttab_map_grant_ref *kmap_ops; 94 + struct gnttab_unmap_grant_ref *kunmap_ops; 94 95 struct page **pages; 96 + unsigned long pages_vm_start; 95 97 }; 96 98 97 99 static int unmap_grant_pages(struct grant_map *map, int offset, int pages); ··· 120 118 return; 121 119 122 120 if (map->pages) 123 - free_xenballooned_pages(map->count, map->pages); 121 + gnttab_free_pages(map->count, map->pages); 124 122 kfree(map->pages); 125 123 kfree(map->grants); 126 124 kfree(map->map_ops); 127 125 kfree(map->unmap_ops); 128 126 kfree(map->kmap_ops); 127 + kfree(map->kunmap_ops); 129 128 kfree(map); 130 129 } 131 130 ··· 143 140 add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); 144 141 add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); 145 142 add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); 143 + add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL); 146 144 add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); 147 145 if (NULL == add->grants || 148 146 NULL == add->map_ops || 149 147 NULL == add->unmap_ops || 150 148 NULL == add->kmap_ops || 149 + NULL == add->kunmap_ops || 151 150 NULL == add->pages) 152 151 goto err; 153 152 154 - if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */)) 153 + if (gnttab_alloc_pages(count, add->pages)) 155 154 goto err; 156 155 157 156 for (i = 0; i < count; i++) { 158 157 add->map_ops[i].handle = -1; 159 158 add->unmap_ops[i].handle = -1; 160 159 add->kmap_ops[i].handle = -1; 160 + add->kunmap_ops[i].handle = -1; 161 161 } 162 162 163 163 add->index = 0; ··· 222 216 } 223 217 224 218 if (populate_freeable_maps && priv) { 225 - spin_lock(&priv->lock); 219 + mutex_lock(&priv->lock); 226 220 list_del(&map->next); 227 - spin_unlock(&priv->lock); 221 + mutex_unlock(&priv->lock); 228 222 } 229 223 230 224 if (map->pages && !use_ptemod) ··· 245 239 BUG_ON(pgnr >= map->count); 246 240 pte_maddr = arbitrary_virt_to_machine(pte).maddr; 247 241 242 + /* 243 + * Set the PTE as special to force get_user_pages_fast() fall 244 + * back to the slow path. If this is not supported as part of 245 + * the grant map, it will be done afterwards. 246 + */ 247 + if (xen_feature(XENFEAT_gnttab_map_avail_bits)) 248 + flags |= (1 << _GNTMAP_guest_avail0); 249 + 248 250 gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, 249 251 map->grants[pgnr].ref, 250 252 map->grants[pgnr].domid); ··· 260 246 -1 /* handle */); 261 247 return 0; 262 248 } 249 + 250 + #ifdef CONFIG_X86 251 + static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token, 252 + unsigned long addr, void *data) 253 + { 254 + set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte)); 255 + return 0; 256 + } 257 + #endif 263 258 264 259 static int map_grant_pages(struct grant_map *map) 265 260 { ··· 303 280 map->flags | GNTMAP_host_map, 304 281 map->grants[i].ref, 305 282 map->grants[i].domid); 283 + gnttab_set_unmap_op(&map->kunmap_ops[i], address, 284 + map->flags | GNTMAP_host_map, -1); 306 285 } 307 286 } 308 287 ··· 315 290 return err; 316 291 317 292 for (i = 0; i < map->count; i++) { 318 - if (map->map_ops[i].status) 293 + if (map->map_ops[i].status) { 319 294 err = -EINVAL; 320 - else { 321 - BUG_ON(map->map_ops[i].handle == -1); 322 - map->unmap_ops[i].handle = map->map_ops[i].handle; 323 - pr_debug("map handle=%d\n", map->map_ops[i].handle); 295 + continue; 324 296 } 297 + 298 + map->unmap_ops[i].handle = map->map_ops[i].handle; 299 + if (use_ptemod) 300 + map->kunmap_ops[i].handle = map->kmap_ops[i].handle; 325 301 } 326 302 return err; 303 + } 304 + 305 + struct unmap_grant_pages_callback_data 306 + { 307 + struct completion completion; 308 + int result; 309 + }; 310 + 311 + static void unmap_grant_callback(int result, 312 + struct gntab_unmap_queue_data *data) 313 + { 314 + struct unmap_grant_pages_callback_data* d = data->data; 315 + 316 + d->result = result; 317 + complete(&d->completion); 327 318 } 328 319 329 320 static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) 330 321 { 331 322 int i, err = 0; 323 + struct gntab_unmap_queue_data unmap_data; 324 + struct unmap_grant_pages_callback_data data; 325 + 326 + init_completion(&data.completion); 327 + unmap_data.data = &data; 328 + unmap_data.done= &unmap_grant_callback; 332 329 333 330 if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { 334 331 int pgno = (map->notify.addr >> PAGE_SHIFT); ··· 362 315 } 363 316 } 364 317 365 - err = gnttab_unmap_refs(map->unmap_ops + offset, 366 - use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, 367 - pages); 368 - if (err) 369 - return err; 318 + unmap_data.unmap_ops = map->unmap_ops + offset; 319 + unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; 320 + unmap_data.pages = map->pages + offset; 321 + unmap_data.count = pages; 322 + 323 + gnttab_unmap_refs_async(&unmap_data); 324 + 325 + wait_for_completion(&data.completion); 326 + if (data.result) 327 + return data.result; 370 328 371 329 for (i = 0; i < pages; i++) { 372 330 if (map->unmap_ops[offset+i].status) ··· 439 387 * not do any unmapping, since that has been done prior to 440 388 * closing the vma, but it may still iterate the unmap_ops list. 441 389 */ 442 - spin_lock(&priv->lock); 390 + mutex_lock(&priv->lock); 443 391 map->vma = NULL; 444 - spin_unlock(&priv->lock); 392 + mutex_unlock(&priv->lock); 445 393 } 446 394 vma->vm_private_data = NULL; 447 395 gntdev_put_map(priv, map); 448 396 } 449 397 398 + static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma, 399 + unsigned long addr) 400 + { 401 + struct grant_map *map = vma->vm_private_data; 402 + 403 + return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT]; 404 + } 405 + 450 406 static struct vm_operations_struct gntdev_vmops = { 451 407 .open = gntdev_vma_open, 452 408 .close = gntdev_vma_close, 409 + .find_special_page = gntdev_vma_find_special_page, 453 410 }; 454 411 455 412 /* ------------------------------------------------------------------ */ ··· 494 433 struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); 495 434 struct grant_map *map; 496 435 497 - spin_lock(&priv->lock); 436 + mutex_lock(&priv->lock); 498 437 list_for_each_entry(map, &priv->maps, next) { 499 438 unmap_if_in_range(map, start, end); 500 439 } 501 440 list_for_each_entry(map, &priv->freeable_maps, next) { 502 441 unmap_if_in_range(map, start, end); 503 442 } 504 - spin_unlock(&priv->lock); 443 + mutex_unlock(&priv->lock); 505 444 } 506 445 507 446 static void mn_invl_page(struct mmu_notifier *mn, ··· 518 457 struct grant_map *map; 519 458 int err; 520 459 521 - spin_lock(&priv->lock); 460 + mutex_lock(&priv->lock); 522 461 list_for_each_entry(map, &priv->maps, next) { 523 462 if (!map->vma) 524 463 continue; ··· 537 476 err = unmap_grant_pages(map, /* offset */ 0, map->count); 538 477 WARN_ON(err); 539 478 } 540 - spin_unlock(&priv->lock); 479 + mutex_unlock(&priv->lock); 541 480 } 542 481 543 482 static struct mmu_notifier_ops gntdev_mmu_ops = { ··· 559 498 560 499 INIT_LIST_HEAD(&priv->maps); 561 500 INIT_LIST_HEAD(&priv->freeable_maps); 562 - spin_lock_init(&priv->lock); 501 + mutex_init(&priv->lock); 563 502 564 503 if (use_ptemod) { 565 504 priv->mm = get_task_mm(current); ··· 633 572 return -EFAULT; 634 573 } 635 574 636 - spin_lock(&priv->lock); 575 + mutex_lock(&priv->lock); 637 576 gntdev_add_map(priv, map); 638 577 op.index = map->index << PAGE_SHIFT; 639 - spin_unlock(&priv->lock); 578 + mutex_unlock(&priv->lock); 640 579 641 580 if (copy_to_user(u, &op, sizeof(op)) != 0) 642 581 return -EFAULT; ··· 655 594 return -EFAULT; 656 595 pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count); 657 596 658 - spin_lock(&priv->lock); 597 + mutex_lock(&priv->lock); 659 598 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); 660 599 if (map) { 661 600 list_del(&map->next); ··· 663 602 list_add_tail(&map->next, &priv->freeable_maps); 664 603 err = 0; 665 604 } 666 - spin_unlock(&priv->lock); 605 + mutex_unlock(&priv->lock); 667 606 if (map) 668 607 gntdev_put_map(priv, map); 669 608 return err; ··· 731 670 out_flags = op.action; 732 671 out_event = op.event_channel_port; 733 672 734 - spin_lock(&priv->lock); 673 + mutex_lock(&priv->lock); 735 674 736 675 list_for_each_entry(map, &priv->maps, next) { 737 676 uint64_t begin = map->index << PAGE_SHIFT; ··· 759 698 rc = 0; 760 699 761 700 unlock_out: 762 - spin_unlock(&priv->lock); 701 + mutex_unlock(&priv->lock); 763 702 764 703 /* Drop the reference to the event channel we did not save in the map */ 765 704 if (out_flags & UNMAP_NOTIFY_SEND_EVENT) ··· 809 748 pr_debug("map %d+%d at %lx (pgoff %lx)\n", 810 749 index, count, vma->vm_start, vma->vm_pgoff); 811 750 812 - spin_lock(&priv->lock); 751 + mutex_lock(&priv->lock); 813 752 map = gntdev_find_map_index(priv, index, count); 814 753 if (!map) 815 754 goto unlock_out; ··· 844 783 map->flags |= GNTMAP_readonly; 845 784 } 846 785 847 - spin_unlock(&priv->lock); 786 + mutex_unlock(&priv->lock); 848 787 849 788 if (use_ptemod) { 850 789 err = apply_to_page_range(vma->vm_mm, vma->vm_start, ··· 867 806 if (err) 868 807 goto out_put_map; 869 808 } 809 + } else { 810 + #ifdef CONFIG_X86 811 + /* 812 + * If the PTEs were not made special by the grant map 813 + * hypercall, do so here. 814 + * 815 + * This is racy since the mapping is already visible 816 + * to userspace but userspace should be well-behaved 817 + * enough to not touch it until the mmap() call 818 + * returns. 819 + */ 820 + if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) { 821 + apply_to_page_range(vma->vm_mm, vma->vm_start, 822 + vma->vm_end - vma->vm_start, 823 + set_grant_ptes_as_special, NULL); 824 + } 825 + #endif 826 + map->pages_vm_start = vma->vm_start; 870 827 } 871 828 872 829 return 0; 873 830 874 831 unlock_out: 875 - spin_unlock(&priv->lock); 832 + mutex_unlock(&priv->lock); 876 833 return err; 877 834 878 835 out_unlock_put: 879 - spin_unlock(&priv->lock); 836 + mutex_unlock(&priv->lock); 880 837 out_put_map: 881 838 if (use_ptemod) 882 839 map->vma = NULL;
+116 -4
drivers/xen/grant-table.c
··· 42 42 #include <linux/io.h> 43 43 #include <linux/delay.h> 44 44 #include <linux/hardirq.h> 45 + #include <linux/workqueue.h> 45 46 46 47 #include <xen/xen.h> 47 48 #include <xen/interface/xen.h> ··· 51 50 #include <xen/interface/memory.h> 52 51 #include <xen/hvc-console.h> 53 52 #include <xen/swiotlb-xen.h> 53 + #include <xen/balloon.h> 54 54 #include <asm/xen/hypercall.h> 55 55 #include <asm/xen/interface.h> 56 56 ··· 673 671 } 674 672 EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames); 675 673 674 + /** 675 + * gnttab_alloc_pages - alloc pages suitable for grant mapping into 676 + * @nr_pages: number of pages to alloc 677 + * @pages: returns the pages 678 + */ 679 + int gnttab_alloc_pages(int nr_pages, struct page **pages) 680 + { 681 + int i; 682 + int ret; 683 + 684 + ret = alloc_xenballooned_pages(nr_pages, pages, false); 685 + if (ret < 0) 686 + return ret; 687 + 688 + for (i = 0; i < nr_pages; i++) { 689 + #if BITS_PER_LONG < 64 690 + struct xen_page_foreign *foreign; 691 + 692 + foreign = kzalloc(sizeof(*foreign), GFP_KERNEL); 693 + if (!foreign) { 694 + gnttab_free_pages(nr_pages, pages); 695 + return -ENOMEM; 696 + } 697 + set_page_private(pages[i], (unsigned long)foreign); 698 + #endif 699 + SetPagePrivate(pages[i]); 700 + } 701 + 702 + return 0; 703 + } 704 + EXPORT_SYMBOL(gnttab_alloc_pages); 705 + 706 + /** 707 + * gnttab_free_pages - free pages allocated by gnttab_alloc_pages() 708 + * @nr_pages; number of pages to free 709 + * @pages: the pages 710 + */ 711 + void gnttab_free_pages(int nr_pages, struct page **pages) 712 + { 713 + int i; 714 + 715 + for (i = 0; i < nr_pages; i++) { 716 + if (PagePrivate(pages[i])) { 717 + #if BITS_PER_LONG < 64 718 + kfree((void *)page_private(pages[i])); 719 + #endif 720 + ClearPagePrivate(pages[i]); 721 + } 722 + } 723 + free_xenballooned_pages(nr_pages, pages); 724 + } 725 + EXPORT_SYMBOL(gnttab_free_pages); 726 + 676 727 /* Handling of paged out grant targets (GNTST_eagain) */ 677 728 #define MAX_DELAY 256 678 729 static inline void ··· 782 727 if (ret) 783 728 return ret; 784 729 785 - /* Retry eagain maps */ 786 - for (i = 0; i < count; i++) 730 + for (i = 0; i < count; i++) { 731 + /* Retry eagain maps */ 787 732 if (map_ops[i].status == GNTST_eagain) 788 733 gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, 789 734 &map_ops[i].status, __func__); 735 + 736 + if (map_ops[i].status == GNTST_okay) { 737 + struct xen_page_foreign *foreign; 738 + 739 + SetPageForeign(pages[i]); 740 + foreign = xen_page_foreign(pages[i]); 741 + foreign->domid = map_ops[i].dom; 742 + foreign->gref = map_ops[i].ref; 743 + } 744 + } 790 745 791 746 return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count); 792 747 } 793 748 EXPORT_SYMBOL_GPL(gnttab_map_refs); 794 749 795 750 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, 796 - struct gnttab_map_grant_ref *kmap_ops, 751 + struct gnttab_unmap_grant_ref *kunmap_ops, 797 752 struct page **pages, unsigned int count) 798 753 { 754 + unsigned int i; 799 755 int ret; 800 756 801 757 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); 802 758 if (ret) 803 759 return ret; 804 760 805 - return clear_foreign_p2m_mapping(unmap_ops, kmap_ops, pages, count); 761 + for (i = 0; i < count; i++) 762 + ClearPageForeign(pages[i]); 763 + 764 + return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count); 806 765 } 807 766 EXPORT_SYMBOL_GPL(gnttab_unmap_refs); 767 + 768 + #define GNTTAB_UNMAP_REFS_DELAY 5 769 + 770 + static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item); 771 + 772 + static void gnttab_unmap_work(struct work_struct *work) 773 + { 774 + struct gntab_unmap_queue_data 775 + *unmap_data = container_of(work, 776 + struct gntab_unmap_queue_data, 777 + gnttab_work.work); 778 + if (unmap_data->age != UINT_MAX) 779 + unmap_data->age++; 780 + __gnttab_unmap_refs_async(unmap_data); 781 + } 782 + 783 + static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item) 784 + { 785 + int ret; 786 + int pc; 787 + 788 + for (pc = 0; pc < item->count; pc++) { 789 + if (page_count(item->pages[pc]) > 1) { 790 + unsigned long delay = GNTTAB_UNMAP_REFS_DELAY * (item->age + 1); 791 + schedule_delayed_work(&item->gnttab_work, 792 + msecs_to_jiffies(delay)); 793 + return; 794 + } 795 + } 796 + 797 + ret = gnttab_unmap_refs(item->unmap_ops, item->kunmap_ops, 798 + item->pages, item->count); 799 + item->done(ret, item); 800 + } 801 + 802 + void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item) 803 + { 804 + INIT_DELAYED_WORK(&item->gnttab_work, gnttab_unmap_work); 805 + item->age = 0; 806 + 807 + __gnttab_unmap_refs_async(item); 808 + } 809 + EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async); 808 810 809 811 static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes) 810 812 {
+7 -1
drivers/xen/manage.c
··· 105 105 106 106 err = freeze_processes(); 107 107 if (err) { 108 - pr_err("%s: freeze failed %d\n", __func__, err); 108 + pr_err("%s: freeze processes failed %d\n", __func__, err); 109 109 goto out; 110 + } 111 + 112 + err = freeze_kernel_threads(); 113 + if (err) { 114 + pr_err("%s: freeze kernel threads failed %d\n", __func__, err); 115 + goto out_thaw; 110 116 } 111 117 112 118 err = dpm_suspend_start(PMSG_FREEZE);
+1 -1
drivers/xen/tmem.c
··· 374 374 }; 375 375 #endif 376 376 377 - static int xen_tmem_init(void) 377 + static int __init xen_tmem_init(void) 378 378 { 379 379 if (!xen_domain()) 380 380 return 0;
+3 -3
drivers/xen/xen-scsiback.c
··· 227 227 return; 228 228 if (i > scsiback_max_buffer_pages) { 229 229 n = min(num, i - scsiback_max_buffer_pages); 230 - free_xenballooned_pages(n, page + num - n); 230 + gnttab_free_pages(n, page + num - n); 231 231 n = num - n; 232 232 } 233 233 spin_lock_irqsave(&free_pages_lock, flags); ··· 244 244 spin_lock_irqsave(&free_pages_lock, flags); 245 245 if (list_empty(&scsiback_free_pages)) { 246 246 spin_unlock_irqrestore(&free_pages_lock, flags); 247 - return alloc_xenballooned_pages(1, page, false); 247 + return gnttab_alloc_pages(1, page); 248 248 } 249 249 page[0] = list_first_entry(&scsiback_free_pages, struct page, lru); 250 250 list_del(&page[0]->lru); ··· 2106 2106 while (free_pages_num) { 2107 2107 if (get_free_page(&page)) 2108 2108 BUG(); 2109 - free_xenballooned_pages(1, &page); 2109 + gnttab_free_pages(1, &page); 2110 2110 } 2111 2111 scsiback_deregister_configfs(); 2112 2112 xenbus_unregister_driver(&scsiback_driver);
+7 -4
drivers/xen/xenbus/xenbus_dev_frontend.c
··· 326 326 } 327 327 328 328 if (msg_type == XS_TRANSACTION_START) { 329 - trans->handle.id = simple_strtoul(reply, NULL, 0); 330 - 331 - list_add(&trans->list, &u->transactions); 332 - } else if (msg_type == XS_TRANSACTION_END) { 329 + if (u->u.msg.type == XS_ERROR) 330 + kfree(trans); 331 + else { 332 + trans->handle.id = simple_strtoul(reply, NULL, 0); 333 + list_add(&trans->list, &u->transactions); 334 + } 335 + } else if (u->u.msg.type == XS_TRANSACTION_END) { 333 336 list_for_each_entry(trans, &u->transactions, list) 334 337 if (trans->handle.id == u->u.msg.tx_id) 335 338 break;
+8
include/linux/mm.h
··· 290 290 /* called by sys_remap_file_pages() to populate non-linear mapping */ 291 291 int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, 292 292 unsigned long size, pgoff_t pgoff); 293 + 294 + /* 295 + * Called by vm_normal_page() for special PTEs to find the 296 + * page for @addr. This is useful if the default behavior 297 + * (using pte_page()) would not find the correct page. 298 + */ 299 + struct page *(*find_special_page)(struct vm_area_struct *vma, 300 + unsigned long addr); 293 301 }; 294 302 295 303 struct mmu_gather;
+5
include/linux/page-flags.h
··· 121 121 PG_fscache = PG_private_2, /* page backed by cache */ 122 122 123 123 /* XEN */ 124 + /* Pinned in Xen as a read-only pagetable page. */ 124 125 PG_pinned = PG_owner_priv_1, 126 + /* Pinned as part of domain save (see xen_mm_pin_all()). */ 125 127 PG_savepinned = PG_dirty, 128 + /* Has a grant mapping of another (foreign) domain's page. */ 129 + PG_foreign = PG_owner_priv_1, 126 130 127 131 /* SLOB */ 128 132 PG_slob_free = PG_private, ··· 219 215 PAGEFLAG(Checked, checked) /* Used by some filesystems */ 220 216 PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ 221 217 PAGEFLAG(SavePinned, savepinned); /* Xen */ 218 + PAGEFLAG(Foreign, foreign); /* Xen */ 222 219 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) 223 220 PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) 224 221 __SETPAGEFLAG(SwapBacked, swapbacked)
+42 -1
include/xen/grant_table.h
··· 45 45 #include <asm/xen/hypervisor.h> 46 46 47 47 #include <xen/features.h> 48 + #include <linux/mm_types.h> 49 + #include <linux/page-flags.h> 48 50 49 51 #define GNTTAB_RESERVED_XENSTORE 1 50 52 ··· 58 56 void (*fn)(void *); 59 57 void *arg; 60 58 u16 count; 59 + }; 60 + 61 + struct gntab_unmap_queue_data; 62 + 63 + typedef void (*gnttab_unmap_refs_done)(int result, struct gntab_unmap_queue_data *data); 64 + 65 + struct gntab_unmap_queue_data 66 + { 67 + struct delayed_work gnttab_work; 68 + void *data; 69 + gnttab_unmap_refs_done done; 70 + struct gnttab_unmap_grant_ref *unmap_ops; 71 + struct gnttab_unmap_grant_ref *kunmap_ops; 72 + struct page **pages; 73 + unsigned int count; 74 + unsigned int age; 61 75 }; 62 76 63 77 int gnttab_init(void); ··· 181 163 182 164 #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) 183 165 166 + int gnttab_alloc_pages(int nr_pages, struct page **pages); 167 + void gnttab_free_pages(int nr_pages, struct page **pages); 168 + 184 169 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, 185 170 struct gnttab_map_grant_ref *kmap_ops, 186 171 struct page **pages, unsigned int count); 187 172 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, 188 - struct gnttab_map_grant_ref *kunmap_ops, 173 + struct gnttab_unmap_grant_ref *kunmap_ops, 189 174 struct page **pages, unsigned int count); 175 + void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item); 176 + 190 177 191 178 /* Perform a batch of grant map/copy operations. Retry every batch slot 192 179 * for which the hypervisor returns GNTST_eagain. This is typically due ··· 204 181 */ 205 182 void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count); 206 183 void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count); 184 + 185 + 186 + struct xen_page_foreign { 187 + domid_t domid; 188 + grant_ref_t gref; 189 + }; 190 + 191 + static inline struct xen_page_foreign *xen_page_foreign(struct page *page) 192 + { 193 + if (!PageForeign(page)) 194 + return NULL; 195 + #if BITS_PER_LONG < 64 196 + return (struct xen_page_foreign *)page->private; 197 + #else 198 + BUILD_BUG_ON(sizeof(struct xen_page_foreign) > BITS_PER_LONG); 199 + return (struct xen_page_foreign *)&page->private; 200 + #endif 201 + } 207 202 208 203 #endif /* __ASM_GNTTAB_H__ */
+6
include/xen/interface/features.h
··· 41 41 /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ 42 42 #define XENFEAT_mmu_pt_update_preserve_ad 5 43 43 44 + /* 45 + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel 46 + * available pte bits. 47 + */ 48 + #define XENFEAT_gnttab_map_avail_bits 7 49 + 44 50 /* x86: Does this Xen host support the HVM callback vector type? */ 45 51 #define XENFEAT_hvm_callback_vector 8 46 52
+7
include/xen/interface/grant_table.h
··· 526 526 #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) 527 527 528 528 /* 529 + * Bits to be placed in guest kernel available PTE bits (architecture 530 + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). 531 + */ 532 + #define _GNTMAP_guest_avail0 (16) 533 + #define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) 534 + 535 + /* 529 536 * Values for error status returns. All errors are -ve. 530 537 */ 531 538 #define GNTST_okay (0) /* Normal return. */
+2
mm/memory.c
··· 754 754 if (HAVE_PTE_SPECIAL) { 755 755 if (likely(!pte_special(pte))) 756 756 goto check_pfn; 757 + if (vma->vm_ops && vma->vm_ops->find_special_page) 758 + return vma->vm_ops->find_special_page(vma, addr); 757 759 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) 758 760 return NULL; 759 761 if (!is_zero_pfn(pfn))