Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'stable/for-linus-3.7-x86-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

Pull Xen update from Konrad Rzeszutek Wilk:
"Features:
- When hotplugging PCI devices in a PV guest we can allocate
Xen-SWIOTLB later.
- Cleanup Xen SWIOTLB.
- Support pages out grants from HVM domains in the backends.
- Support wild cards in xen-pciback.hide=(BDF) arguments.
- Update grant status updates with upstream hypervisor.
- Boot PV guests with more than 128GB.
- Cleanup Xen MMU code/add comments.
- Obtain XENVERS using a preferred method.
- Lay out generic changes to support Xen ARM.
- Allow privcmd ioctl for HVM (used to do only PV).
- Do v2 of mmap_batch for privcmd ioctls.
- If hypervisor saves the LED keyboard light - we will now instruct
the kernel about its state.
Fixes:
- More fixes to Xen PCI backend for various calls/FLR/etc.
- With more than 4GB in a 64-bit PV guest disable native SWIOTLB.
- Fix up smatch warnings.
- Fix up various return values in privmcmd and mm."

* tag 'stable/for-linus-3.7-x86-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: (48 commits)
xen/pciback: Restore the PCI config space after an FLR.
xen-pciback: properly clean up after calling pcistub_device_find()
xen/vga: add the xen EFI video mode support
xen/x86: retrieve keyboard shift status flags from hypervisor.
xen/gndev: Xen backend support for paged out grant targets V4.
xen-pciback: support wild cards in slot specifications
xen/swiotlb: Fix compile warnings when using plain integer instead of NULL pointer.
xen/swiotlb: Remove functions not needed anymore.
xen/pcifront: Use Xen-SWIOTLB when initting if required.
xen/swiotlb: For early initialization, return zero on success.
xen/swiotlb: Use the swiotlb_late_init_with_tbl to init Xen-SWIOTLB late when PV PCI is used.
xen/swiotlb: Move the error strings to its own function.
xen/swiotlb: Move the nr_tbl determination in its own function.
xen/arm: compile and run xenbus
xen: resynchronise grant table status codes with upstream
xen/privcmd: return -EFAULT on error
xen/privcmd: Fix mmap batch ioctl error status copy back.
xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
xen/mm: return more precise error from xen_remap_domain_range()
xen/mmu: If the revector fails, don't attempt to revector anything else.
...

+915 -248
+6 -1
arch/ia64/include/asm/xen/interface.h
··· 67 67 #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) 68 68 69 69 #ifndef __ASSEMBLY__ 70 + /* Explicitly size integers that represent pfns in the public interface 71 + * with Xen so that we could have one ABI that works for 32 and 64 bit 72 + * guests. */ 73 + typedef unsigned long xen_pfn_t; 70 74 /* Guest handles for primitive C types. */ 71 75 __DEFINE_GUEST_HANDLE(uchar, unsigned char); 72 76 __DEFINE_GUEST_HANDLE(uint, unsigned int); ··· 83 79 DEFINE_GUEST_HANDLE(uint64_t); 84 80 DEFINE_GUEST_HANDLE(uint32_t); 85 81 86 - typedef unsigned long xen_pfn_t; 87 82 DEFINE_GUEST_HANDLE(xen_pfn_t); 88 83 #define PRI_xen_pfn "lx" 89 84 #endif ··· 267 264 typedef struct xen_callback xen_callback_t; 268 265 269 266 #endif /* !__ASSEMBLY__ */ 267 + 268 + #include <asm/pvclock-abi.h> 270 269 271 270 /* Size of the shared_info area (this is not related to page size). */ 272 271 #define XSI_SHIFT 14
+7
arch/x86/include/asm/xen/interface.h
··· 47 47 #endif 48 48 49 49 #ifndef __ASSEMBLY__ 50 + /* Explicitly size integers that represent pfns in the public interface 51 + * with Xen so that on ARM we can have one ABI that works for 32 and 64 52 + * bit guests. */ 53 + typedef unsigned long xen_pfn_t; 50 54 /* Guest handles for primitive C types. */ 51 55 __DEFINE_GUEST_HANDLE(uchar, unsigned char); 52 56 __DEFINE_GUEST_HANDLE(uint, unsigned int); ··· 61 57 DEFINE_GUEST_HANDLE(void); 62 58 DEFINE_GUEST_HANDLE(uint64_t); 63 59 DEFINE_GUEST_HANDLE(uint32_t); 60 + DEFINE_GUEST_HANDLE(xen_pfn_t); 64 61 #endif 65 62 66 63 #ifndef HYPERVISOR_VIRT_START ··· 125 120 #else 126 121 #include "interface_64.h" 127 122 #endif 123 + 124 + #include <asm/pvclock-abi.h> 128 125 129 126 #ifndef __ASSEMBLY__ 130 127 /*
+2
arch/x86/include/asm/xen/swiotlb-xen.h
··· 5 5 extern int xen_swiotlb; 6 6 extern int __init pci_xen_swiotlb_detect(void); 7 7 extern void __init pci_xen_swiotlb_init(void); 8 + extern int pci_xen_swiotlb_init_late(void); 8 9 #else 9 10 #define xen_swiotlb (0) 10 11 static inline int __init pci_xen_swiotlb_detect(void) { return 0; } 11 12 static inline void __init pci_xen_swiotlb_init(void) { } 13 + static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } 12 14 #endif 13 15 14 16 #endif /* _ASM_X86_SWIOTLB_XEN_H */
+2 -1
arch/x86/xen/apic.c
··· 6 6 7 7 #include <xen/xen.h> 8 8 #include <xen/interface/physdev.h> 9 + #include "xen-ops.h" 9 10 10 - unsigned int xen_io_apic_read(unsigned apic, unsigned reg) 11 + static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) 11 12 { 12 13 struct physdev_apic apic_op; 13 14 int ret;
+11 -4
arch/x86/xen/enlighten.c
··· 80 80 #include "smp.h" 81 81 #include "multicalls.h" 82 82 83 + #include <xen/events.h> 84 + 83 85 EXPORT_SYMBOL_GPL(hypercall_page); 84 86 85 87 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); ··· 1290 1288 { 1291 1289 struct physdev_set_iopl set_iopl; 1292 1290 int rc; 1293 - pgd_t *pgd; 1294 1291 1295 1292 if (!xen_start_info) 1296 1293 return; ··· 1381 1380 acpi_numa = -1; 1382 1381 #endif 1383 1382 1384 - pgd = (pgd_t *)xen_start_info->pt_base; 1385 - 1386 1383 /* Don't do the full vcpu_info placement stuff until we have a 1387 1384 possible map and a non-dummy shared_info. */ 1388 1385 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; ··· 1389 1390 early_boot_irqs_disabled = true; 1390 1391 1391 1392 xen_raw_console_write("mapping kernel into physical memory\n"); 1392 - pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1393 + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); 1393 1394 1394 1395 /* Allocate and initialize top and mid mfn levels for p2m structure */ 1395 1396 xen_build_mfn_list_list(); ··· 1440 1441 const struct dom0_vga_console_info *info = 1441 1442 (void *)((char *)xen_start_info + 1442 1443 xen_start_info->console.dom0.info_off); 1444 + struct xen_platform_op op = { 1445 + .cmd = XENPF_firmware_info, 1446 + .interface_version = XENPF_INTERFACE_VERSION, 1447 + .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, 1448 + }; 1443 1449 1444 1450 xen_init_vga(info, xen_start_info->console.dom0.info_size); 1445 1451 xen_start_info->console.domU.mfn = 0; 1446 1452 xen_start_info->console.domU.evtchn = 0; 1453 + 1454 + if (HYPERVISOR_dom0_op(&op) == 0) 1455 + boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; 1447 1456 1448 1457 xen_init_apic(); 1449 1458
+152 -36
arch/x86/xen/mmu.c
··· 84 84 */ 85 85 DEFINE_SPINLOCK(xen_reservation_lock); 86 86 87 + #ifdef CONFIG_X86_32 87 88 /* 88 89 * Identity map, in addition to plain kernel map. This needs to be 89 90 * large enough to allocate page table pages to allocate the rest. ··· 92 91 */ 93 92 #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) 94 93 static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); 95 - 94 + #endif 96 95 #ifdef CONFIG_X86_64 97 96 /* l3 pud for userspace vsyscall mapping */ 98 97 static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; ··· 1177 1176 1178 1177 static void xen_post_allocator_init(void); 1179 1178 1180 - static void __init xen_pagetable_init(void) 1181 - { 1182 - paging_init(); 1183 - xen_setup_shared_info(); 1184 - xen_post_allocator_init(); 1185 - } 1186 - 1187 1179 static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) 1188 1180 { 1189 1181 /* reserve the range used */ ··· 1191 1197 } 1192 1198 } 1193 1199 1200 + #ifdef CONFIG_X86_64 1201 + static void __init xen_cleanhighmap(unsigned long vaddr, 1202 + unsigned long vaddr_end) 1203 + { 1204 + unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; 1205 + pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); 1206 + 1207 + /* NOTE: The loop is more greedy than the cleanup_highmap variant. 1208 + * We include the PMD passed in on _both_ boundaries. */ 1209 + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); 1210 + pmd++, vaddr += PMD_SIZE) { 1211 + if (pmd_none(*pmd)) 1212 + continue; 1213 + if (vaddr < (unsigned long) _text || vaddr > kernel_end) 1214 + set_pmd(pmd, __pmd(0)); 1215 + } 1216 + /* In case we did something silly, we should crash in this function 1217 + * instead of somewhere later and be confusing. */ 1218 + xen_mc_flush(); 1219 + } 1220 + #endif 1221 + static void __init xen_pagetable_init(void) 1222 + { 1223 + #ifdef CONFIG_X86_64 1224 + unsigned long size; 1225 + unsigned long addr; 1226 + #endif 1227 + paging_init(); 1228 + xen_setup_shared_info(); 1229 + #ifdef CONFIG_X86_64 1230 + if (!xen_feature(XENFEAT_auto_translated_physmap)) { 1231 + unsigned long new_mfn_list; 1232 + 1233 + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1234 + 1235 + /* On 32-bit, we get zero so this never gets executed. */ 1236 + new_mfn_list = xen_revector_p2m_tree(); 1237 + if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { 1238 + /* using __ka address and sticking INVALID_P2M_ENTRY! */ 1239 + memset((void *)xen_start_info->mfn_list, 0xff, size); 1240 + 1241 + /* We should be in __ka space. */ 1242 + BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); 1243 + addr = xen_start_info->mfn_list; 1244 + /* We roundup to the PMD, which means that if anybody at this stage is 1245 + * using the __ka address of xen_start_info or xen_start_info->shared_info 1246 + * they are in going to crash. Fortunatly we have already revectored 1247 + * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ 1248 + size = roundup(size, PMD_SIZE); 1249 + xen_cleanhighmap(addr, addr + size); 1250 + 1251 + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1252 + memblock_free(__pa(xen_start_info->mfn_list), size); 1253 + /* And revector! Bye bye old array */ 1254 + xen_start_info->mfn_list = new_mfn_list; 1255 + } else 1256 + goto skip; 1257 + } 1258 + /* At this stage, cleanup_highmap has already cleaned __ka space 1259 + * from _brk_limit way up to the max_pfn_mapped (which is the end of 1260 + * the ramdisk). We continue on, erasing PMD entries that point to page 1261 + * tables - do note that they are accessible at this stage via __va. 1262 + * For good measure we also round up to the PMD - which means that if 1263 + * anybody is using __ka address to the initial boot-stack - and try 1264 + * to use it - they are going to crash. The xen_start_info has been 1265 + * taken care of already in xen_setup_kernel_pagetable. */ 1266 + addr = xen_start_info->pt_base; 1267 + size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); 1268 + 1269 + xen_cleanhighmap(addr, addr + size); 1270 + xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); 1271 + #ifdef DEBUG 1272 + /* This is superflous and is not neccessary, but you know what 1273 + * lets do it. The MODULES_VADDR -> MODULES_END should be clear of 1274 + * anything at this stage. */ 1275 + xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); 1276 + #endif 1277 + skip: 1278 + #endif 1279 + xen_post_allocator_init(); 1280 + } 1194 1281 static void xen_write_cr2(unsigned long cr2) 1195 1282 { 1196 1283 this_cpu_read(xen_vcpu)->arch.cr2 = cr2; ··· 1727 1652 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) 1728 1653 BUG(); 1729 1654 } 1730 - 1655 + #ifdef CONFIG_X86_32 1731 1656 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) 1732 1657 { 1733 1658 unsigned pmdidx, pteidx; ··· 1778 1703 1779 1704 set_page_prot(pmd, PAGE_KERNEL_RO); 1780 1705 } 1781 - 1706 + #endif 1782 1707 void __init xen_setup_machphys_mapping(void) 1783 1708 { 1784 1709 struct xen_machphys_mapping mapping; ··· 1806 1731 for (i = 0; i < PTRS_PER_PTE; i++) 1807 1732 pte[i] = xen_make_pte(pte[i].pte); 1808 1733 } 1809 - 1734 + static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, 1735 + unsigned long addr) 1736 + { 1737 + if (*pt_base == PFN_DOWN(__pa(addr))) { 1738 + set_page_prot((void *)addr, PAGE_KERNEL); 1739 + clear_page((void *)addr); 1740 + (*pt_base)++; 1741 + } 1742 + if (*pt_end == PFN_DOWN(__pa(addr))) { 1743 + set_page_prot((void *)addr, PAGE_KERNEL); 1744 + clear_page((void *)addr); 1745 + (*pt_end)--; 1746 + } 1747 + } 1810 1748 /* 1811 1749 * Set up the initial kernel pagetable. 1812 1750 * ··· 1831 1743 * of the physical mapping once some sort of allocator has been set 1832 1744 * up. 1833 1745 */ 1834 - pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 1835 - unsigned long max_pfn) 1746 + void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1836 1747 { 1837 1748 pud_t *l3; 1838 1749 pmd_t *l2; 1750 + unsigned long addr[3]; 1751 + unsigned long pt_base, pt_end; 1752 + unsigned i; 1839 1753 1840 1754 /* max_pfn_mapped is the last pfn mapped in the initial memory 1841 1755 * mappings. Considering that on Xen after the kernel mappings we ··· 1845 1755 * set max_pfn_mapped to the last real pfn mapped. */ 1846 1756 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); 1847 1757 1758 + pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); 1759 + pt_end = pt_base + xen_start_info->nr_pt_frames; 1760 + 1848 1761 /* Zap identity mapping */ 1849 1762 init_level4_pgt[0] = __pgd(0); 1850 1763 1851 1764 /* Pre-constructed entries are in pfn, so convert to mfn */ 1765 + /* L4[272] -> level3_ident_pgt 1766 + * L4[511] -> level3_kernel_pgt */ 1852 1767 convert_pfn_mfn(init_level4_pgt); 1768 + 1769 + /* L3_i[0] -> level2_ident_pgt */ 1853 1770 convert_pfn_mfn(level3_ident_pgt); 1771 + /* L3_k[510] -> level2_kernel_pgt 1772 + * L3_i[511] -> level2_fixmap_pgt */ 1854 1773 convert_pfn_mfn(level3_kernel_pgt); 1855 1774 1775 + /* We get [511][511] and have Xen's version of level2_kernel_pgt */ 1856 1776 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); 1857 1777 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); 1858 1778 1859 - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1860 - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1779 + addr[0] = (unsigned long)pgd; 1780 + addr[1] = (unsigned long)l3; 1781 + addr[2] = (unsigned long)l2; 1782 + /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: 1783 + * Both L4[272][0] and L4[511][511] have entries that point to the same 1784 + * L2 (PMD) tables. Meaning that if you modify it in __va space 1785 + * it will be also modified in the __ka space! (But if you just 1786 + * modify the PMD table to point to other PTE's or none, then you 1787 + * are OK - which is what cleanup_highmap does) */ 1788 + copy_page(level2_ident_pgt, l2); 1789 + /* Graft it onto L4[511][511] */ 1790 + copy_page(level2_kernel_pgt, l2); 1861 1791 1792 + /* Get [511][510] and graft that in level2_fixmap_pgt */ 1862 1793 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); 1863 1794 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); 1864 - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1865 - 1866 - /* Set up identity map */ 1867 - xen_map_identity_early(level2_ident_pgt, max_pfn); 1795 + copy_page(level2_fixmap_pgt, l2); 1796 + /* Note that we don't do anything with level1_fixmap_pgt which 1797 + * we don't need. */ 1868 1798 1869 1799 /* Make pagetable pieces RO */ 1870 1800 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); 1871 1801 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); 1872 1802 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); 1873 1803 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); 1804 + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); 1874 1805 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1875 1806 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1876 1807 ··· 1902 1791 /* Unpin Xen-provided one */ 1903 1792 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 1904 1793 1905 - /* Switch over */ 1906 - pgd = init_level4_pgt; 1907 - 1908 1794 /* 1909 1795 * At this stage there can be no user pgd, and no page 1910 1796 * structure to attach it to, so make sure we just set kernel 1911 1797 * pgd. 1912 1798 */ 1913 1799 xen_mc_batch(); 1914 - __xen_write_cr3(true, __pa(pgd)); 1800 + __xen_write_cr3(true, __pa(init_level4_pgt)); 1915 1801 xen_mc_issue(PARAVIRT_LAZY_CPU); 1916 1802 1917 - memblock_reserve(__pa(xen_start_info->pt_base), 1918 - xen_start_info->nr_pt_frames * PAGE_SIZE); 1803 + /* We can't that easily rip out L3 and L2, as the Xen pagetables are 1804 + * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for 1805 + * the initial domain. For guests using the toolstack, they are in: 1806 + * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only 1807 + * rip out the [L4] (pgd), but for guests we shave off three pages. 1808 + */ 1809 + for (i = 0; i < ARRAY_SIZE(addr); i++) 1810 + check_pt_base(&pt_base, &pt_end, addr[i]); 1919 1811 1920 - return pgd; 1812 + /* Our (by three pages) smaller Xen pagetable that we are using */ 1813 + memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); 1814 + /* Revector the xen_start_info */ 1815 + xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); 1921 1816 } 1922 1817 #else /* !CONFIG_X86_64 */ 1923 1818 static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); ··· 1948 1831 */ 1949 1832 swapper_kernel_pmd = 1950 1833 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); 1951 - memcpy(swapper_kernel_pmd, initial_kernel_pmd, 1952 - sizeof(pmd_t) * PTRS_PER_PMD); 1834 + copy_page(swapper_kernel_pmd, initial_kernel_pmd); 1953 1835 swapper_pg_dir[KERNEL_PGD_BOUNDARY] = 1954 1836 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); 1955 1837 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); ··· 1965 1849 pv_mmu_ops.write_cr3 = &xen_write_cr3; 1966 1850 } 1967 1851 1968 - pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 1969 - unsigned long max_pfn) 1852 + void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1970 1853 { 1971 1854 pmd_t *kernel_pmd; 1972 1855 ··· 1977 1862 512*1024); 1978 1863 1979 1864 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 1980 - memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 1865 + copy_page(initial_kernel_pmd, kernel_pmd); 1981 1866 1982 1867 xen_map_identity_early(initial_kernel_pmd, max_pfn); 1983 1868 1984 - memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); 1869 + copy_page(initial_page_table, pgd); 1985 1870 initial_page_table[KERNEL_PGD_BOUNDARY] = 1986 1871 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); 1987 1872 ··· 1997 1882 1998 1883 memblock_reserve(__pa(xen_start_info->pt_base), 1999 1884 xen_start_info->nr_pt_frames * PAGE_SIZE); 2000 - 2001 - return initial_page_table; 2002 1885 } 2003 1886 #endif /* CONFIG_X86_64 */ 2004 1887 ··· 2446 2333 unsigned long range; 2447 2334 int err = 0; 2448 2335 2336 + if (xen_feature(XENFEAT_auto_translated_physmap)) 2337 + return -EINVAL; 2338 + 2449 2339 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); 2450 2340 2451 2341 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == ··· 2467 2351 if (err) 2468 2352 goto out; 2469 2353 2470 - err = -EFAULT; 2471 - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) 2354 + err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); 2355 + if (err < 0) 2472 2356 goto out; 2473 2357 2474 2358 nr -= batch;
+85 -7
arch/x86/xen/p2m.c
··· 22 22 * 23 23 * P2M_PER_PAGE depends on the architecture, as a mfn is always 24 24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to 25 - * 512 and 1024 entries respectively. 25 + * 512 and 1024 entries respectively. 26 26 * 27 27 * In short, these structures contain the Machine Frame Number (MFN) of the PFN. 28 28 * ··· 139 139 * / | ~0, ~0, .... | 140 140 * | \---------------/ 141 141 * | 142 - * p2m_missing p2m_missing 143 - * /------------------\ /------------\ 144 - * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | 145 - * | [p2m_mid_missing]+---->| ..., ~0 | 146 - * \------------------/ \------------/ 142 + * p2m_mid_missing p2m_missing 143 + * /-----------------\ /------------\ 144 + * | [p2m_missing] +---->| ~0, ~0, ~0 | 145 + * | [p2m_missing] +---->| ..., ~0 | 146 + * \-----------------/ \------------/ 147 147 * 148 148 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) 149 149 */ ··· 396 396 397 397 m2p_override_init(); 398 398 } 399 + #ifdef CONFIG_X86_64 400 + #include <linux/bootmem.h> 401 + unsigned long __init xen_revector_p2m_tree(void) 402 + { 403 + unsigned long va_start; 404 + unsigned long va_end; 405 + unsigned long pfn; 406 + unsigned long pfn_free = 0; 407 + unsigned long *mfn_list = NULL; 408 + unsigned long size; 399 409 410 + va_start = xen_start_info->mfn_list; 411 + /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), 412 + * so make sure it is rounded up to that */ 413 + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 414 + va_end = va_start + size; 415 + 416 + /* If we were revectored already, don't do it again. */ 417 + if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) 418 + return 0; 419 + 420 + mfn_list = alloc_bootmem_align(size, PAGE_SIZE); 421 + if (!mfn_list) { 422 + pr_warn("Could not allocate space for a new P2M tree!\n"); 423 + return xen_start_info->mfn_list; 424 + } 425 + /* Fill it out with INVALID_P2M_ENTRY value */ 426 + memset(mfn_list, 0xFF, size); 427 + 428 + for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { 429 + unsigned topidx = p2m_top_index(pfn); 430 + unsigned mididx; 431 + unsigned long *mid_p; 432 + 433 + if (!p2m_top[topidx]) 434 + continue; 435 + 436 + if (p2m_top[topidx] == p2m_mid_missing) 437 + continue; 438 + 439 + mididx = p2m_mid_index(pfn); 440 + mid_p = p2m_top[topidx][mididx]; 441 + if (!mid_p) 442 + continue; 443 + if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) 444 + continue; 445 + 446 + if ((unsigned long)mid_p == INVALID_P2M_ENTRY) 447 + continue; 448 + 449 + /* The old va. Rebase it on mfn_list */ 450 + if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { 451 + unsigned long *new; 452 + 453 + if (pfn_free > (size / sizeof(unsigned long))) { 454 + WARN(1, "Only allocated for %ld pages, but we want %ld!\n", 455 + size / sizeof(unsigned long), pfn_free); 456 + return 0; 457 + } 458 + new = &mfn_list[pfn_free]; 459 + 460 + copy_page(new, mid_p); 461 + p2m_top[topidx][mididx] = &mfn_list[pfn_free]; 462 + p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); 463 + 464 + pfn_free += P2M_PER_PAGE; 465 + 466 + } 467 + /* This should be the leafs allocated for identity from _brk. */ 468 + } 469 + return (unsigned long)mfn_list; 470 + 471 + } 472 + #else 473 + unsigned long __init xen_revector_p2m_tree(void) 474 + { 475 + return 0; 476 + } 477 + #endif 400 478 unsigned long get_phys_to_machine(unsigned long pfn) 401 479 { 402 480 unsigned topidx, mididx, idx; ··· 508 430 free_page((unsigned long)p); 509 431 } 510 432 511 - /* 433 + /* 512 434 * Fully allocate the p2m structure for a given pfn. We need to check 513 435 * that both the top and mid levels are allocated, and make sure the 514 436 * parallel mfn tree is kept in sync. We may race with other cpus, so
+45 -7
arch/x86/xen/pci-swiotlb-xen.c
··· 8 8 #include <xen/xen.h> 9 9 #include <asm/iommu_table.h> 10 10 11 + 12 + #include <asm/xen/swiotlb-xen.h> 13 + #ifdef CONFIG_X86_64 14 + #include <asm/iommu.h> 15 + #include <asm/dma.h> 16 + #endif 17 + #include <linux/export.h> 18 + 11 19 int xen_swiotlb __read_mostly; 12 20 13 21 static struct dma_map_ops xen_swiotlb_dma_ops = { ··· 42 34 int __init pci_xen_swiotlb_detect(void) 43 35 { 44 36 37 + if (!xen_pv_domain()) 38 + return 0; 39 + 45 40 /* If running as PV guest, either iommu=soft, or swiotlb=force will 46 41 * activate this IOMMU. If running as PV privileged, activate it 47 42 * irregardless. 48 43 */ 49 - if ((xen_initial_domain() || swiotlb || swiotlb_force) && 50 - (xen_pv_domain())) 44 + if ((xen_initial_domain() || swiotlb || swiotlb_force)) 51 45 xen_swiotlb = 1; 52 46 53 47 /* If we are running under Xen, we MUST disable the native SWIOTLB. 54 48 * Don't worry about swiotlb_force flag activating the native, as 55 49 * the 'swiotlb' flag is the only one turning it on. */ 56 - if (xen_pv_domain()) 57 - swiotlb = 0; 50 + swiotlb = 0; 58 51 52 + #ifdef CONFIG_X86_64 53 + /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0 54 + * (so no iommu=X command line over-writes). 55 + * Considering that PV guests do not want the *native SWIOTLB* but 56 + * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here. 57 + */ 58 + if (max_pfn > MAX_DMA32_PFN) 59 + no_iommu = 1; 60 + #endif 59 61 return xen_swiotlb; 60 62 } 61 63 62 64 void __init pci_xen_swiotlb_init(void) 63 65 { 64 66 if (xen_swiotlb) { 65 - xen_swiotlb_init(1); 67 + xen_swiotlb_init(1, true /* early */); 66 68 dma_ops = &xen_swiotlb_dma_ops; 67 69 68 70 /* Make sure ACS will be enabled */ 69 71 pci_request_acs(); 70 72 } 71 73 } 74 + 75 + int pci_xen_swiotlb_init_late(void) 76 + { 77 + int rc; 78 + 79 + if (xen_swiotlb) 80 + return 0; 81 + 82 + rc = xen_swiotlb_init(1, false /* late */); 83 + if (rc) 84 + return rc; 85 + 86 + dma_ops = &xen_swiotlb_dma_ops; 87 + /* Make sure ACS will be enabled */ 88 + pci_request_acs(); 89 + 90 + return 0; 91 + } 92 + EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); 93 + 72 94 IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, 73 - 0, 95 + NULL, 74 96 pci_xen_swiotlb_init, 75 - 0); 97 + NULL);
+1
arch/x86/xen/platform-pci-unplug.c
··· 24 24 #include <linux/module.h> 25 25 26 26 #include <xen/platform_pci.h> 27 + #include "xen-ops.h" 27 28 28 29 #define XEN_PLATFORM_ERR_MAGIC -1 29 30 #define XEN_PLATFORM_ERR_PROTOCOL -2
+18
arch/x86/xen/setup.c
··· 432 432 * - mfn_list 433 433 * - xen_start_info 434 434 * See comment above "struct start_info" in <xen/interface/xen.h> 435 + * We tried to make the the memblock_reserve more selective so 436 + * that it would be clear what region is reserved. Sadly we ran 437 + * in the problem wherein on a 64-bit hypervisor with a 32-bit 438 + * initial domain, the pt_base has the cr3 value which is not 439 + * neccessarily where the pagetable starts! As Jan put it: " 440 + * Actually, the adjustment turns out to be correct: The page 441 + * tables for a 32-on-64 dom0 get allocated in the order "first L1", 442 + * "first L2", "first L3", so the offset to the page table base is 443 + * indeed 2. When reading xen/include/public/xen.h's comment 444 + * very strictly, this is not a violation (since there nothing is said 445 + * that the first thing in the page table space is pointed to by 446 + * pt_base; I admit that this seems to be implied though, namely 447 + * do I think that it is implied that the page table space is the 448 + * range [pt_base, pt_base + nt_pt_frames), whereas that 449 + * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), 450 + * which - without a priori knowledge - the kernel would have 451 + * difficulty to figure out)." - so lets just fall back to the 452 + * easy way and reserve the whole region. 435 453 */ 436 454 memblock_reserve(__pa(xen_start_info->mfn_list), 437 455 xen_start_info->pt_base - xen_start_info->mfn_list);
+7
arch/x86/xen/vga.c
··· 35 35 info->u.text_mode_3.font_height; 36 36 break; 37 37 38 + case XEN_VGATYPE_EFI_LFB: 38 39 case XEN_VGATYPE_VESA_LFB: 39 40 if (size < offsetof(struct dom0_vga_console_info, 40 41 u.vesa_lfb.gbl_caps)) ··· 55 54 screen_info->blue_pos = info->u.vesa_lfb.blue_pos; 56 55 screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; 57 56 screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; 57 + 58 + if (info->video_type == XEN_VGATYPE_EFI_LFB) { 59 + screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; 60 + break; 61 + } 62 + 58 63 if (size >= offsetof(struct dom0_vga_console_info, 59 64 u.vesa_lfb.gbl_caps) 60 65 + sizeof(info->u.vesa_lfb.gbl_caps))
+54 -2
arch/x86/xen/xen-head.S
··· 28 28 __FINIT 29 29 30 30 .pushsection .text 31 - .align PAGE_SIZE 31 + .balign PAGE_SIZE 32 32 ENTRY(hypercall_page) 33 - .skip PAGE_SIZE 33 + #define NEXT_HYPERCALL(x) \ 34 + ENTRY(xen_hypercall_##x) \ 35 + .skip 32 36 + 37 + NEXT_HYPERCALL(set_trap_table) 38 + NEXT_HYPERCALL(mmu_update) 39 + NEXT_HYPERCALL(set_gdt) 40 + NEXT_HYPERCALL(stack_switch) 41 + NEXT_HYPERCALL(set_callbacks) 42 + NEXT_HYPERCALL(fpu_taskswitch) 43 + NEXT_HYPERCALL(sched_op_compat) 44 + NEXT_HYPERCALL(platform_op) 45 + NEXT_HYPERCALL(set_debugreg) 46 + NEXT_HYPERCALL(get_debugreg) 47 + NEXT_HYPERCALL(update_descriptor) 48 + NEXT_HYPERCALL(ni) 49 + NEXT_HYPERCALL(memory_op) 50 + NEXT_HYPERCALL(multicall) 51 + NEXT_HYPERCALL(update_va_mapping) 52 + NEXT_HYPERCALL(set_timer_op) 53 + NEXT_HYPERCALL(event_channel_op_compat) 54 + NEXT_HYPERCALL(xen_version) 55 + NEXT_HYPERCALL(console_io) 56 + NEXT_HYPERCALL(physdev_op_compat) 57 + NEXT_HYPERCALL(grant_table_op) 58 + NEXT_HYPERCALL(vm_assist) 59 + NEXT_HYPERCALL(update_va_mapping_otherdomain) 60 + NEXT_HYPERCALL(iret) 61 + NEXT_HYPERCALL(vcpu_op) 62 + NEXT_HYPERCALL(set_segment_base) 63 + NEXT_HYPERCALL(mmuext_op) 64 + NEXT_HYPERCALL(xsm_op) 65 + NEXT_HYPERCALL(nmi_op) 66 + NEXT_HYPERCALL(sched_op) 67 + NEXT_HYPERCALL(callback_op) 68 + NEXT_HYPERCALL(xenoprof_op) 69 + NEXT_HYPERCALL(event_channel_op) 70 + NEXT_HYPERCALL(physdev_op) 71 + NEXT_HYPERCALL(hvm_op) 72 + NEXT_HYPERCALL(sysctl) 73 + NEXT_HYPERCALL(domctl) 74 + NEXT_HYPERCALL(kexec_op) 75 + NEXT_HYPERCALL(tmem_op) /* 38 */ 76 + ENTRY(xen_hypercall_rsvr) 77 + .skip 320 78 + NEXT_HYPERCALL(mca) /* 48 */ 79 + NEXT_HYPERCALL(arch_1) 80 + NEXT_HYPERCALL(arch_2) 81 + NEXT_HYPERCALL(arch_3) 82 + NEXT_HYPERCALL(arch_4) 83 + NEXT_HYPERCALL(arch_5) 84 + NEXT_HYPERCALL(arch_6) 85 + .balign PAGE_SIZE 34 86 .popsection 35 87 36 88 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
+2 -1
arch/x86/xen/xen-ops.h
··· 27 27 void xen_setup_shared_info(void); 28 28 void xen_build_mfn_list_list(void); 29 29 void xen_setup_machphys_mapping(void); 30 - pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); 30 + void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); 31 31 void xen_reserve_top(void); 32 32 extern unsigned long xen_max_p2m_pfn; 33 33 ··· 45 45 void xen_unplug_emulated_devices(void); 46 46 47 47 void __init xen_build_dynamic_phys_to_machine(void); 48 + unsigned long __init xen_revector_p2m_tree(void); 48 49 49 50 void xen_init_irq_ops(void); 50 51 void xen_setup_timer(int cpu);
+3 -8
drivers/net/xen-netback/netback.c
··· 635 635 return; 636 636 637 637 BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); 638 - ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op, 639 - npo.copy_prod); 640 - BUG_ON(ret != 0); 638 + gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod); 641 639 642 640 while ((skb = __skb_dequeue(&rxq)) != NULL) { 643 641 sco = (struct skb_cb_overlay *)skb->cb; ··· 1458 1460 static void xen_netbk_tx_action(struct xen_netbk *netbk) 1459 1461 { 1460 1462 unsigned nr_gops; 1461 - int ret; 1462 1463 1463 1464 nr_gops = xen_netbk_tx_build_gops(netbk); 1464 1465 1465 1466 if (nr_gops == 0) 1466 1467 return; 1467 - ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, 1468 - netbk->tx_copy_ops, nr_gops); 1469 - BUG_ON(ret); 1468 + 1469 + gnttab_batch_copy(netbk->tx_copy_ops, nr_gops); 1470 1470 1471 1471 xen_netbk_tx_submit(netbk); 1472 - 1473 1472 } 1474 1473 1475 1474 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+10 -5
drivers/pci/xen-pcifront.c
··· 21 21 #include <linux/bitops.h> 22 22 #include <linux/time.h> 23 23 24 + #include <asm/xen/swiotlb-xen.h> 24 25 #define INVALID_GRANT_REF (0) 25 26 #define INVALID_EVTCHN (-1) 26 27 ··· 237 236 return errno_to_pcibios_err(do_pci_op(pdev, &op)); 238 237 } 239 238 240 - struct pci_ops pcifront_bus_ops = { 239 + static struct pci_ops pcifront_bus_ops = { 241 240 .read = pcifront_bus_read, 242 241 .write = pcifront_bus_write, 243 242 }; ··· 669 668 schedule_pcifront_aer_op(pdev); 670 669 return IRQ_HANDLED; 671 670 } 672 - static int pcifront_connect(struct pcifront_device *pdev) 671 + static int pcifront_connect_and_init_dma(struct pcifront_device *pdev) 673 672 { 674 673 int err = 0; 675 674 ··· 682 681 dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); 683 682 err = -EEXIST; 684 683 } 685 - 686 684 spin_unlock(&pcifront_dev_lock); 687 685 686 + if (!err && !swiotlb_nr_tbl()) { 687 + err = pci_xen_swiotlb_init_late(); 688 + if (err) 689 + dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n"); 690 + } 688 691 return err; 689 692 } 690 693 ··· 847 842 XenbusStateInitialised) 848 843 goto out; 849 844 850 - err = pcifront_connect(pdev); 845 + err = pcifront_connect_and_init_dma(pdev); 851 846 if (err) { 852 847 xenbus_dev_fatal(pdev->xdev, err, 853 - "Error connecting PCI Frontend"); 848 + "Error setting up PCI Frontend"); 854 849 goto out; 855 850 } 856 851
+2
drivers/tty/hvc/hvc_xen.c
··· 21 21 #include <linux/console.h> 22 22 #include <linux/delay.h> 23 23 #include <linux/err.h> 24 + #include <linux/irq.h> 24 25 #include <linux/init.h> 25 26 #include <linux/types.h> 26 27 #include <linux/list.h> ··· 36 35 #include <xen/page.h> 37 36 #include <xen/events.h> 38 37 #include <xen/interface/io/console.h> 38 + #include <xen/interface/sched.h> 39 39 #include <xen/hvc-console.h> 40 40 #include <xen/xenbus.h> 41 41
+15 -3
drivers/xen/events.c
··· 373 373 { 374 374 struct shared_info *s = HYPERVISOR_shared_info; 375 375 unsigned int cpu = get_cpu(); 376 + int do_hypercall = 0, evtchn_pending = 0; 376 377 377 378 BUG_ON(!irqs_disabled()); 378 379 379 - /* Slow path (hypercall) if this is a non-local port. */ 380 - if (unlikely(cpu != cpu_from_evtchn(port))) { 380 + if (unlikely((cpu != cpu_from_evtchn(port)))) 381 + do_hypercall = 1; 382 + else 383 + evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]); 384 + 385 + if (unlikely(evtchn_pending && xen_hvm_domain())) 386 + do_hypercall = 1; 387 + 388 + /* Slow path (hypercall) if this is a non-local port or if this is 389 + * an hvm domain and an event is pending (hvm domains don't have 390 + * their own implementation of irq_enable). */ 391 + if (do_hypercall) { 381 392 struct evtchn_unmask unmask = { .port = port }; 382 393 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); 383 394 } else { ··· 401 390 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose 402 391 * the interrupt edge' if the channel is masked. 403 392 */ 404 - if (sync_test_bit(port, &s->evtchn_pending[0]) && 393 + if (evtchn_pending && 405 394 !sync_test_and_set_bit(port / BITS_PER_LONG, 406 395 &vcpu_info->evtchn_pending_sel)) 407 396 vcpu_info->evtchn_upcall_pending = 1; ··· 842 831 struct irq_info *info = info_for_irq(irq); 843 832 WARN_ON(info == NULL || info->type != IRQT_EVTCHN); 844 833 } 834 + irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); 845 835 846 836 out: 847 837 mutex_unlock(&irq_mapping_update_lock);
+1 -1
drivers/xen/gntdev.c
··· 446 446 spin_unlock(&priv->lock); 447 447 } 448 448 449 - struct mmu_notifier_ops gntdev_mmu_ops = { 449 + static struct mmu_notifier_ops gntdev_mmu_ops = { 450 450 .release = mn_release, 451 451 .invalidate_page = mn_invl_page, 452 452 .invalidate_range_start = mn_invl_range_start,
+60 -7
drivers/xen/grant-table.c
··· 38 38 #include <linux/vmalloc.h> 39 39 #include <linux/uaccess.h> 40 40 #include <linux/io.h> 41 + #include <linux/delay.h> 41 42 #include <linux/hardirq.h> 42 43 43 44 #include <xen/xen.h> ··· 48 47 #include <xen/interface/memory.h> 49 48 #include <xen/hvc-console.h> 50 49 #include <asm/xen/hypercall.h> 50 + #include <asm/xen/interface.h> 51 51 52 52 #include <asm/pgtable.h> 53 53 #include <asm/sync_bitops.h> ··· 287 285 } 288 286 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); 289 287 290 - void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, 291 - unsigned long frame, int flags, 292 - unsigned page_off, 293 - unsigned length) 288 + static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, 289 + unsigned long frame, int flags, 290 + unsigned page_off, unsigned length) 294 291 { 295 292 gnttab_shared.v2[ref].sub_page.frame = frame; 296 293 gnttab_shared.v2[ref].sub_page.page_off = page_off; ··· 346 345 } 347 346 EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); 348 347 349 - void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, 350 - int flags, domid_t trans_domid, 351 - grant_ref_t trans_gref) 348 + static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, 349 + int flags, domid_t trans_domid, 350 + grant_ref_t trans_gref) 352 351 { 353 352 gnttab_shared.v2[ref].transitive.trans_domid = trans_domid; 354 353 gnttab_shared.v2[ref].transitive.gref = trans_gref; ··· 824 823 } 825 824 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); 826 825 826 + /* Handling of paged out grant targets (GNTST_eagain) */ 827 + #define MAX_DELAY 256 828 + static inline void 829 + gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status, 830 + const char *func) 831 + { 832 + unsigned delay = 1; 833 + 834 + do { 835 + BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1)); 836 + if (*status == GNTST_eagain) 837 + msleep(delay++); 838 + } while ((*status == GNTST_eagain) && (delay < MAX_DELAY)); 839 + 840 + if (delay >= MAX_DELAY) { 841 + printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm); 842 + *status = GNTST_bad_page; 843 + } 844 + } 845 + 846 + void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count) 847 + { 848 + struct gnttab_map_grant_ref *op; 849 + 850 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count)) 851 + BUG(); 852 + for (op = batch; op < batch + count; op++) 853 + if (op->status == GNTST_eagain) 854 + gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op, 855 + &op->status, __func__); 856 + } 857 + EXPORT_SYMBOL_GPL(gnttab_batch_map); 858 + 859 + void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) 860 + { 861 + struct gnttab_copy *op; 862 + 863 + if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count)) 864 + BUG(); 865 + for (op = batch; op < batch + count; op++) 866 + if (op->status == GNTST_eagain) 867 + gnttab_retry_eagain_gop(GNTTABOP_copy, op, 868 + &op->status, __func__); 869 + } 870 + EXPORT_SYMBOL_GPL(gnttab_batch_copy); 871 + 827 872 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, 828 873 struct gnttab_map_grant_ref *kmap_ops, 829 874 struct page **pages, unsigned int count) ··· 882 835 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count); 883 836 if (ret) 884 837 return ret; 838 + 839 + /* Retry eagain maps */ 840 + for (i = 0; i < count; i++) 841 + if (map_ops[i].status == GNTST_eagain) 842 + gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, 843 + &map_ops[i].status, __func__); 885 844 886 845 if (xen_feature(XENFEAT_auto_translated_physmap)) 887 846 return ret;
+100 -31
drivers/xen/privcmd.c
··· 76 76 */ 77 77 static int gather_array(struct list_head *pagelist, 78 78 unsigned nelem, size_t size, 79 - void __user *data) 79 + const void __user *data) 80 80 { 81 81 unsigned pageidx; 82 82 void *pagedata; ··· 246 246 domid_t domain; 247 247 unsigned long va; 248 248 struct vm_area_struct *vma; 249 - int err; 249 + /* A tristate: 250 + * 0 for no errors 251 + * 1 if at least one error has happened (and no 252 + * -ENOENT errors have happened) 253 + * -ENOENT if at least 1 -ENOENT has happened. 254 + */ 255 + int global_error; 256 + /* An array for individual errors */ 257 + int *err; 250 258 251 - xen_pfn_t __user *user; 259 + /* User-space mfn array to store errors in the second pass for V1. */ 260 + xen_pfn_t __user *user_mfn; 252 261 }; 253 262 254 263 static int mmap_batch_fn(void *data, void *state) 255 264 { 256 265 xen_pfn_t *mfnp = data; 257 266 struct mmap_batch_state *st = state; 267 + int ret; 258 268 259 - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, 260 - st->vma->vm_page_prot, st->domain) < 0) { 261 - *mfnp |= 0xf0000000U; 262 - st->err++; 269 + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, 270 + st->vma->vm_page_prot, st->domain); 271 + 272 + /* Store error code for second pass. */ 273 + *(st->err++) = ret; 274 + 275 + /* And see if it affects the global_error. */ 276 + if (ret < 0) { 277 + if (ret == -ENOENT) 278 + st->global_error = -ENOENT; 279 + else { 280 + /* Record that at least one error has happened. */ 281 + if (st->global_error == 0) 282 + st->global_error = 1; 283 + } 263 284 } 264 285 st->va += PAGE_SIZE; 265 286 266 287 return 0; 267 288 } 268 289 269 - static int mmap_return_errors(void *data, void *state) 290 + static int mmap_return_errors_v1(void *data, void *state) 270 291 { 271 292 xen_pfn_t *mfnp = data; 272 293 struct mmap_batch_state *st = state; 294 + int err = *(st->err++); 273 295 274 - return put_user(*mfnp, st->user++); 296 + /* 297 + * V1 encodes the error codes in the 32bit top nibble of the 298 + * mfn (with its known limitations vis-a-vis 64 bit callers). 299 + */ 300 + *mfnp |= (err == -ENOENT) ? 301 + PRIVCMD_MMAPBATCH_PAGED_ERROR : 302 + PRIVCMD_MMAPBATCH_MFN_ERROR; 303 + return __put_user(*mfnp, st->user_mfn++); 275 304 } 276 305 277 306 static struct vm_operations_struct privcmd_vm_ops; 278 307 279 - static long privcmd_ioctl_mmap_batch(void __user *udata) 308 + static long privcmd_ioctl_mmap_batch(void __user *udata, int version) 280 309 { 281 310 int ret; 282 - struct privcmd_mmapbatch m; 311 + struct privcmd_mmapbatch_v2 m; 283 312 struct mm_struct *mm = current->mm; 284 313 struct vm_area_struct *vma; 285 314 unsigned long nr_pages; 286 315 LIST_HEAD(pagelist); 316 + int *err_array = NULL; 287 317 struct mmap_batch_state state; 288 318 289 319 if (!xen_initial_domain()) 290 320 return -EPERM; 291 321 292 - if (copy_from_user(&m, udata, sizeof(m))) 293 - return -EFAULT; 322 + switch (version) { 323 + case 1: 324 + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) 325 + return -EFAULT; 326 + /* Returns per-frame error in m.arr. */ 327 + m.err = NULL; 328 + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) 329 + return -EFAULT; 330 + break; 331 + case 2: 332 + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) 333 + return -EFAULT; 334 + /* Returns per-frame error code in m.err. */ 335 + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) 336 + return -EFAULT; 337 + break; 338 + default: 339 + return -EINVAL; 340 + } 294 341 295 342 nr_pages = m.num; 296 343 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) 297 344 return -EINVAL; 298 345 299 - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), 300 - m.arr); 346 + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); 301 347 302 - if (ret || list_empty(&pagelist)) 348 + if (ret) 303 349 goto out; 350 + if (list_empty(&pagelist)) { 351 + ret = -EINVAL; 352 + goto out; 353 + } 354 + 355 + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL); 356 + if (err_array == NULL) { 357 + ret = -ENOMEM; 358 + goto out; 359 + } 304 360 305 361 down_write(&mm->mmap_sem); 306 362 ··· 371 315 goto out; 372 316 } 373 317 374 - state.domain = m.dom; 375 - state.vma = vma; 376 - state.va = m.addr; 377 - state.err = 0; 318 + state.domain = m.dom; 319 + state.vma = vma; 320 + state.va = m.addr; 321 + state.global_error = 0; 322 + state.err = err_array; 378 323 379 - ret = traverse_pages(m.num, sizeof(xen_pfn_t), 380 - &pagelist, mmap_batch_fn, &state); 324 + /* mmap_batch_fn guarantees ret == 0 */ 325 + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), 326 + &pagelist, mmap_batch_fn, &state)); 381 327 382 328 up_write(&mm->mmap_sem); 383 329 384 - if (state.err > 0) { 385 - state.user = m.arr; 330 + if (state.global_error && (version == 1)) { 331 + /* Write back errors in second pass. */ 332 + state.user_mfn = (xen_pfn_t *)m.arr; 333 + state.err = err_array; 386 334 ret = traverse_pages(m.num, sizeof(xen_pfn_t), 387 - &pagelist, 388 - mmap_return_errors, &state); 335 + &pagelist, mmap_return_errors_v1, &state); 336 + } else if (version == 2) { 337 + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int)); 338 + if (ret) 339 + ret = -EFAULT; 389 340 } 390 341 342 + /* If we have not had any EFAULT-like global errors then set the global 343 + * error to -ENOENT if necessary. */ 344 + if ((ret == 0) && (state.global_error == -ENOENT)) 345 + ret = -ENOENT; 346 + 391 347 out: 348 + kfree(err_array); 392 349 free_page_list(&pagelist); 393 350 394 351 return ret; ··· 423 354 break; 424 355 425 356 case IOCTL_PRIVCMD_MMAPBATCH: 426 - ret = privcmd_ioctl_mmap_batch(udata); 357 + ret = privcmd_ioctl_mmap_batch(udata, 1); 358 + break; 359 + 360 + case IOCTL_PRIVCMD_MMAPBATCH_V2: 361 + ret = privcmd_ioctl_mmap_batch(udata, 2); 427 362 break; 428 363 429 364 default: ··· 453 380 454 381 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) 455 382 { 456 - /* Unsupported for auto-translate guests. */ 457 - if (xen_feature(XENFEAT_auto_translated_physmap)) 458 - return -ENOSYS; 459 - 460 383 /* DONTCOPY is essential for Xen because copy_page_range doesn't know 461 384 * how to recreate these mappings */ 462 385 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
+78 -45
drivers/xen/swiotlb-xen.c
··· 52 52 * Quick lookup value of the bus address of the IOTLB. 53 53 */ 54 54 55 - u64 start_dma_addr; 55 + static u64 start_dma_addr; 56 56 57 57 static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) 58 58 { ··· 144 144 } while (i < nslabs); 145 145 return 0; 146 146 } 147 - 148 - void __init xen_swiotlb_init(int verbose) 147 + static unsigned long xen_set_nslabs(unsigned long nr_tbl) 149 148 { 150 - unsigned long bytes; 151 - int rc = -ENOMEM; 152 - unsigned long nr_tbl; 153 - char *m = NULL; 154 - unsigned int repeat = 3; 155 - 156 - nr_tbl = swiotlb_nr_tbl(); 157 - if (nr_tbl) 158 - xen_io_tlb_nslabs = nr_tbl; 159 - else { 149 + if (!nr_tbl) { 160 150 xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); 161 151 xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); 162 - } 163 - retry: 164 - bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; 152 + } else 153 + xen_io_tlb_nslabs = nr_tbl; 165 154 155 + return xen_io_tlb_nslabs << IO_TLB_SHIFT; 156 + } 157 + 158 + enum xen_swiotlb_err { 159 + XEN_SWIOTLB_UNKNOWN = 0, 160 + XEN_SWIOTLB_ENOMEM, 161 + XEN_SWIOTLB_EFIXUP 162 + }; 163 + 164 + static const char *xen_swiotlb_error(enum xen_swiotlb_err err) 165 + { 166 + switch (err) { 167 + case XEN_SWIOTLB_ENOMEM: 168 + return "Cannot allocate Xen-SWIOTLB buffer\n"; 169 + case XEN_SWIOTLB_EFIXUP: 170 + return "Failed to get contiguous memory for DMA from Xen!\n"\ 171 + "You either: don't have the permissions, do not have"\ 172 + " enough free memory under 4GB, or the hypervisor memory"\ 173 + " is too fragmented!"; 174 + default: 175 + break; 176 + } 177 + return ""; 178 + } 179 + int __ref xen_swiotlb_init(int verbose, bool early) 180 + { 181 + unsigned long bytes, order; 182 + int rc = -ENOMEM; 183 + enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN; 184 + unsigned int repeat = 3; 185 + 186 + xen_io_tlb_nslabs = swiotlb_nr_tbl(); 187 + retry: 188 + bytes = xen_set_nslabs(xen_io_tlb_nslabs); 189 + order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT); 166 190 /* 167 191 * Get IO TLB memory from any location. 168 192 */ 169 - xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); 193 + if (early) 194 + xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); 195 + else { 196 + #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) 197 + #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) 198 + while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { 199 + xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order); 200 + if (xen_io_tlb_start) 201 + break; 202 + order--; 203 + } 204 + if (order != get_order(bytes)) { 205 + pr_warn("Warning: only able to allocate %ld MB " 206 + "for software IO TLB\n", (PAGE_SIZE << order) >> 20); 207 + xen_io_tlb_nslabs = SLABS_PER_PAGE << order; 208 + bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; 209 + } 210 + } 170 211 if (!xen_io_tlb_start) { 171 - m = "Cannot allocate Xen-SWIOTLB buffer!\n"; 212 + m_ret = XEN_SWIOTLB_ENOMEM; 172 213 goto error; 173 214 } 174 215 xen_io_tlb_end = xen_io_tlb_start + bytes; ··· 220 179 bytes, 221 180 xen_io_tlb_nslabs); 222 181 if (rc) { 223 - free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes)); 224 - m = "Failed to get contiguous memory for DMA from Xen!\n"\ 225 - "You either: don't have the permissions, do not have"\ 226 - " enough free memory under 4GB, or the hypervisor memory"\ 227 - "is too fragmented!"; 182 + if (early) 183 + free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes)); 184 + else { 185 + free_pages((unsigned long)xen_io_tlb_start, order); 186 + xen_io_tlb_start = NULL; 187 + } 188 + m_ret = XEN_SWIOTLB_EFIXUP; 228 189 goto error; 229 190 } 230 191 start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); 231 - swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); 232 - 233 - return; 192 + if (early) { 193 + swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); 194 + rc = 0; 195 + } else 196 + rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs); 197 + return rc; 234 198 error: 235 199 if (repeat--) { 236 200 xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ ··· 244 198 (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); 245 199 goto retry; 246 200 } 247 - xen_raw_printk("%s (rc:%d)", m, rc); 248 - panic("%s (rc:%d)", m, rc); 201 + pr_err("%s (rc:%d)", xen_swiotlb_error(m_ret), rc); 202 + if (early) 203 + panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc); 204 + else 205 + free_pages((unsigned long)xen_io_tlb_start, order); 206 + return rc; 249 207 } 250 - 251 208 void * 252 209 xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, 253 210 dma_addr_t *dma_handle, gfp_t flags, ··· 515 466 } 516 467 EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs); 517 468 518 - int 519 - xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 520 - enum dma_data_direction dir) 521 - { 522 - return xen_swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); 523 - } 524 - EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg); 525 - 526 469 /* 527 470 * Unmap a set of streaming mode DMA translations. Again, cpu read rules 528 471 * concerning calls here are the same as for swiotlb_unmap_page() above. ··· 534 493 535 494 } 536 495 EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); 537 - 538 - void 539 - xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 540 - enum dma_data_direction dir) 541 - { 542 - return xen_swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); 543 - } 544 - EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg); 545 496 546 497 /* 547 498 * Make physical memory consistent for a set of streaming mode DMA translations
+12 -1
drivers/xen/sys-hypervisor.c
··· 114 114 115 115 /* UUID */ 116 116 117 - static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) 117 + static ssize_t uuid_show_fallback(struct hyp_sysfs_attr *attr, char *buffer) 118 118 { 119 119 char *vm, *val; 120 120 int ret; ··· 132 132 return PTR_ERR(val); 133 133 ret = sprintf(buffer, "%s\n", val); 134 134 kfree(val); 135 + return ret; 136 + } 137 + 138 + static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) 139 + { 140 + xen_domain_handle_t uuid; 141 + int ret; 142 + ret = HYPERVISOR_xen_version(XENVER_guest_handle, uuid); 143 + if (ret) 144 + return uuid_show_fallback(attr, buffer); 145 + ret = sprintf(buffer, "%pU\n", uuid); 135 146 return ret; 136 147 } 137 148
+1
drivers/xen/tmem.c
··· 21 21 #include <asm/xen/hypercall.h> 22 22 #include <asm/xen/page.h> 23 23 #include <asm/xen/hypervisor.h> 24 + #include <xen/tmem.h> 24 25 25 26 #define TMEM_CONTROL 0 26 27 #define TMEM_NEW_POOL 1
+106 -30
drivers/xen/xen-pciback/pci_stub.c
··· 362 362 else { 363 363 dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n"); 364 364 __pci_reset_function_locked(dev); 365 + pci_restore_state(dev); 365 366 } 366 367 /* Now disable the device (this also ensures some private device 367 368 * data is setup before we export) ··· 682 681 dev_err(&dev->dev, DRV_NAME " device is not connected or owned" 683 682 " by HVM, kill it\n"); 684 683 kill_domain_by_device(psdev); 685 - goto release; 684 + goto end; 686 685 } 687 686 688 687 if (!test_bit(_XEN_PCIB_AERHANDLER, 689 688 (unsigned long *)&psdev->pdev->sh_info->flags)) { 690 689 dev_err(&dev->dev, 691 690 "guest with no AER driver should have been killed\n"); 692 - goto release; 691 + goto end; 693 692 } 694 693 result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); 695 694 ··· 699 698 "No AER slot_reset service or disconnected!\n"); 700 699 kill_domain_by_device(psdev); 701 700 } 702 - release: 703 - pcistub_device_put(psdev); 704 701 end: 702 + if (psdev) 703 + pcistub_device_put(psdev); 705 704 up_write(&pcistub_sem); 706 705 return result; 707 706 ··· 740 739 dev_err(&dev->dev, DRV_NAME " device is not connected or owned" 741 740 " by HVM, kill it\n"); 742 741 kill_domain_by_device(psdev); 743 - goto release; 742 + goto end; 744 743 } 745 744 746 745 if (!test_bit(_XEN_PCIB_AERHANDLER, 747 746 (unsigned long *)&psdev->pdev->sh_info->flags)) { 748 747 dev_err(&dev->dev, 749 748 "guest with no AER driver should have been killed\n"); 750 - goto release; 749 + goto end; 751 750 } 752 751 result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); 753 752 ··· 757 756 "No AER mmio_enabled service or disconnected!\n"); 758 757 kill_domain_by_device(psdev); 759 758 } 760 - release: 761 - pcistub_device_put(psdev); 762 759 end: 760 + if (psdev) 761 + pcistub_device_put(psdev); 763 762 up_write(&pcistub_sem); 764 763 return result; 765 764 } ··· 798 797 dev_err(&dev->dev, DRV_NAME " device is not connected or owned" 799 798 " by HVM, kill it\n"); 800 799 kill_domain_by_device(psdev); 801 - goto release; 800 + goto end; 802 801 } 803 802 804 803 /*Guest owns the device yet no aer handler regiested, kill guest*/ ··· 806 805 (unsigned long *)&psdev->pdev->sh_info->flags)) { 807 806 dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); 808 807 kill_domain_by_device(psdev); 809 - goto release; 808 + goto end; 810 809 } 811 810 result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); 812 811 ··· 816 815 "No AER error_detected service or disconnected!\n"); 817 816 kill_domain_by_device(psdev); 818 817 } 819 - release: 820 - pcistub_device_put(psdev); 821 818 end: 819 + if (psdev) 820 + pcistub_device_put(psdev); 822 821 up_write(&pcistub_sem); 823 822 return result; 824 823 } ··· 852 851 dev_err(&dev->dev, DRV_NAME " device is not connected or owned" 853 852 " by HVM, kill it\n"); 854 853 kill_domain_by_device(psdev); 855 - goto release; 854 + goto end; 856 855 } 857 856 858 857 if (!test_bit(_XEN_PCIB_AERHANDLER, ··· 860 859 dev_err(&dev->dev, 861 860 "guest with no AER driver should have been killed\n"); 862 861 kill_domain_by_device(psdev); 863 - goto release; 862 + goto end; 864 863 } 865 864 common_process(psdev, 1, XEN_PCI_OP_aer_resume, 866 865 PCI_ERS_RESULT_RECOVERED); 867 - release: 868 - pcistub_device_put(psdev); 869 866 end: 867 + if (psdev) 868 + pcistub_device_put(psdev); 870 869 up_write(&pcistub_sem); 871 870 return; 872 871 } ··· 898 897 int *slot, int *func) 899 898 { 900 899 int err; 900 + char wc = '*'; 901 901 902 902 err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); 903 - if (err == 4) 903 + switch (err) { 904 + case 3: 905 + *func = -1; 906 + err = sscanf(buf, " %x:%x:%x.%c", domain, bus, slot, &wc); 907 + break; 908 + case 2: 909 + *slot = *func = -1; 910 + err = sscanf(buf, " %x:%x:*.%c", domain, bus, &wc); 911 + if (err >= 2) 912 + ++err; 913 + break; 914 + } 915 + if (err == 4 && wc == '*') 904 916 return 0; 905 917 else if (err < 0) 906 918 return -EINVAL; 907 919 908 920 /* try again without domain */ 909 921 *domain = 0; 922 + wc = '*'; 910 923 err = sscanf(buf, " %x:%x.%x", bus, slot, func); 911 - if (err == 3) 924 + switch (err) { 925 + case 2: 926 + *func = -1; 927 + err = sscanf(buf, " %x:%x.%c", bus, slot, &wc); 928 + break; 929 + case 1: 930 + *slot = *func = -1; 931 + err = sscanf(buf, " %x:*.%c", bus, &wc) + 1; 932 + break; 933 + } 934 + if (err == 3 && wc == '*') 912 935 return 0; 913 936 914 937 return -EINVAL; ··· 955 930 { 956 931 struct pcistub_device_id *pci_dev_id; 957 932 unsigned long flags; 933 + int rc = 0; 934 + 935 + if (slot < 0) { 936 + for (slot = 0; !rc && slot < 32; ++slot) 937 + rc = pcistub_device_id_add(domain, bus, slot, func); 938 + return rc; 939 + } 940 + 941 + if (func < 0) { 942 + for (func = 0; !rc && func < 8; ++func) 943 + rc = pcistub_device_id_add(domain, bus, slot, func); 944 + return rc; 945 + } 958 946 959 947 pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); 960 948 if (!pci_dev_id) ··· 990 952 static int pcistub_device_id_remove(int domain, int bus, int slot, int func) 991 953 { 992 954 struct pcistub_device_id *pci_dev_id, *t; 993 - int devfn = PCI_DEVFN(slot, func); 994 955 int err = -ENOENT; 995 956 unsigned long flags; 996 957 997 958 spin_lock_irqsave(&device_ids_lock, flags); 998 959 list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, 999 960 slot_list) { 1000 - if (pci_dev_id->domain == domain 1001 - && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { 961 + if (pci_dev_id->domain == domain && pci_dev_id->bus == bus 962 + && (slot < 0 || PCI_SLOT(pci_dev_id->devfn) == slot) 963 + && (func < 0 || PCI_FUNC(pci_dev_id->devfn) == func)) { 1002 964 /* Don't break; here because it's possible the same 1003 965 * slot could be in the list more than once 1004 966 */ ··· 1025 987 struct config_field *field; 1026 988 1027 989 psdev = pcistub_device_find(domain, bus, slot, func); 1028 - if (!psdev || !psdev->dev) { 990 + if (!psdev) { 1029 991 err = -ENODEV; 1030 992 goto out; 1031 993 } ··· 1049 1011 if (err) 1050 1012 kfree(field); 1051 1013 out: 1014 + if (psdev) 1015 + pcistub_device_put(psdev); 1052 1016 return err; 1053 1017 } 1054 1018 ··· 1155 1115 1156 1116 err = str_to_slot(buf, &domain, &bus, &slot, &func); 1157 1117 if (err) 1158 - goto out; 1118 + return err; 1159 1119 1160 1120 psdev = pcistub_device_find(domain, bus, slot, func); 1161 - 1162 1121 if (!psdev) 1163 1122 goto out; 1164 1123 ··· 1173 1134 if (dev_data->isr_on) 1174 1135 dev_data->ack_intr = 1; 1175 1136 out: 1137 + if (psdev) 1138 + pcistub_device_put(psdev); 1176 1139 if (!err) 1177 1140 err = count; 1178 1141 return err; ··· 1257 1216 err = str_to_slot(buf, &domain, &bus, &slot, &func); 1258 1217 if (err) 1259 1218 goto out; 1219 + if (slot < 0 || func < 0) { 1220 + err = -EINVAL; 1221 + goto out; 1222 + } 1260 1223 psdev = pcistub_device_find(domain, bus, slot, func); 1261 1224 if (!psdev) { 1262 1225 err = -ENODEV; 1263 1226 goto out; 1264 1227 } 1265 - if (!psdev->dev) { 1266 - err = -ENODEV; 1267 - goto release; 1268 - } 1228 + 1269 1229 dev_data = pci_get_drvdata(psdev->dev); 1270 1230 /* the driver data for a device should never be null at this point */ 1271 1231 if (!dev_data) { ··· 1339 1297 1340 1298 if (pci_devs_to_hide && *pci_devs_to_hide) { 1341 1299 do { 1300 + char wc = '*'; 1301 + 1342 1302 parsed = 0; 1343 1303 1344 1304 err = sscanf(pci_devs_to_hide + pos, 1345 1305 " (%x:%x:%x.%x) %n", 1346 1306 &domain, &bus, &slot, &func, &parsed); 1347 - if (err != 4) { 1307 + switch (err) { 1308 + case 3: 1309 + func = -1; 1310 + err = sscanf(pci_devs_to_hide + pos, 1311 + " (%x:%x:%x.%c) %n", 1312 + &domain, &bus, &slot, &wc, 1313 + &parsed); 1314 + break; 1315 + case 2: 1316 + slot = func = -1; 1317 + err = sscanf(pci_devs_to_hide + pos, 1318 + " (%x:%x:*.%c) %n", 1319 + &domain, &bus, &wc, &parsed) + 1; 1320 + break; 1321 + } 1322 + 1323 + if (err != 4 || wc != '*') { 1348 1324 domain = 0; 1325 + wc = '*'; 1349 1326 err = sscanf(pci_devs_to_hide + pos, 1350 1327 " (%x:%x.%x) %n", 1351 1328 &bus, &slot, &func, &parsed); 1352 - if (err != 3) 1329 + switch (err) { 1330 + case 2: 1331 + func = -1; 1332 + err = sscanf(pci_devs_to_hide + pos, 1333 + " (%x:%x.%c) %n", 1334 + &bus, &slot, &wc, 1335 + &parsed); 1336 + break; 1337 + case 1: 1338 + slot = func = -1; 1339 + err = sscanf(pci_devs_to_hide + pos, 1340 + " (%x:*.%c) %n", 1341 + &bus, &wc, &parsed) + 1; 1342 + break; 1343 + } 1344 + if (err != 3 || wc != '*') 1353 1345 goto parse_error; 1354 1346 } 1355 1347
+2 -4
drivers/xen/xenbus/xenbus_client.c
··· 490 490 491 491 op.host_addr = arbitrary_virt_to_machine(pte).maddr; 492 492 493 - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) 494 - BUG(); 493 + gnttab_batch_map(&op, 1); 495 494 496 495 if (op.status != GNTST_okay) { 497 496 free_vm_area(area); ··· 571 572 gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref, 572 573 dev->otherend_id); 573 574 574 - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) 575 - BUG(); 575 + gnttab_batch_map(&op, 1); 576 576 577 577 if (op.status != GNTST_okay) { 578 578 xenbus_dev_fatal(dev, op.status,
+1 -1
drivers/xen/xenbus/xenbus_comms.c
··· 224 224 int err; 225 225 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 226 226 0, "xenbus", &xb_waitq); 227 - if (err <= 0) { 227 + if (err < 0) { 228 228 printk(KERN_ERR "XENBUS request irq failed %i\n", err); 229 229 return err; 230 230 }
+1 -1
drivers/xen/xenbus/xenbus_dev_backend.c
··· 107 107 return 0; 108 108 } 109 109 110 - const struct file_operations xenbus_backend_fops = { 110 + static const struct file_operations xenbus_backend_fops = { 111 111 .open = xenbus_backend_open, 112 112 .mmap = xenbus_backend_mmap, 113 113 .unlocked_ioctl = xenbus_backend_ioctl,
+40 -16
drivers/xen/xenbus/xenbus_probe.c
··· 324 324 return 0; 325 325 } 326 326 327 - struct xenbus_device *xenbus_device_find(const char *nodename, 328 - struct bus_type *bus) 327 + static struct xenbus_device *xenbus_device_find(const char *nodename, 328 + struct bus_type *bus) 329 329 { 330 330 struct xb_find_info info = { .dev = NULL, .nodename = nodename }; 331 331 ··· 719 719 return err; 720 720 } 721 721 722 + enum xenstore_init { 723 + UNKNOWN, 724 + PV, 725 + HVM, 726 + LOCAL, 727 + }; 722 728 static int __init xenbus_init(void) 723 729 { 724 730 int err = 0; 731 + enum xenstore_init usage = UNKNOWN; 732 + uint64_t v = 0; 725 733 726 734 if (!xen_domain()) 727 735 return -ENODEV; 728 736 729 737 xenbus_ring_ops_init(); 730 738 731 - if (xen_hvm_domain()) { 732 - uint64_t v = 0; 739 + if (xen_pv_domain()) 740 + usage = PV; 741 + if (xen_hvm_domain()) 742 + usage = HVM; 743 + if (xen_hvm_domain() && xen_initial_domain()) 744 + usage = LOCAL; 745 + if (xen_pv_domain() && !xen_start_info->store_evtchn) 746 + usage = LOCAL; 747 + if (xen_pv_domain() && xen_start_info->store_evtchn) 748 + xenstored_ready = 1; 749 + 750 + switch (usage) { 751 + case LOCAL: 752 + err = xenstored_local_init(); 753 + if (err) 754 + goto out_error; 755 + xen_store_interface = mfn_to_virt(xen_store_mfn); 756 + break; 757 + case PV: 758 + xen_store_evtchn = xen_start_info->store_evtchn; 759 + xen_store_mfn = xen_start_info->store_mfn; 760 + xen_store_interface = mfn_to_virt(xen_store_mfn); 761 + break; 762 + case HVM: 733 763 err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); 734 764 if (err) 735 765 goto out_error; ··· 768 738 if (err) 769 739 goto out_error; 770 740 xen_store_mfn = (unsigned long)v; 771 - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); 772 - } else { 773 - xen_store_evtchn = xen_start_info->store_evtchn; 774 - xen_store_mfn = xen_start_info->store_mfn; 775 - if (xen_store_evtchn) 776 - xenstored_ready = 1; 777 - else { 778 - err = xenstored_local_init(); 779 - if (err) 780 - goto out_error; 781 - } 782 - xen_store_interface = mfn_to_virt(xen_store_mfn); 741 + xen_store_interface = 742 + ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); 743 + break; 744 + default: 745 + pr_warn("Xenstore state unknown\n"); 746 + break; 783 747 } 784 748 785 749 /* Initialize the interface to xenstore. */
+1
drivers/xen/xenbus/xenbus_probe_frontend.c
··· 21 21 #include <xen/xenbus.h> 22 22 #include <xen/events.h> 23 23 #include <xen/page.h> 24 + #include <xen/xen.h> 24 25 25 26 #include <xen/platform_pci.h> 26 27
+2 -1
drivers/xen/xenbus/xenbus_xs.c
··· 44 44 #include <linux/rwsem.h> 45 45 #include <linux/module.h> 46 46 #include <linux/mutex.h> 47 + #include <asm/xen/hypervisor.h> 47 48 #include <xen/xenbus.h> 48 49 #include <xen/xen.h> 49 50 #include "xenbus_comms.h" ··· 623 622 { 624 623 int err, supported = 0; 625 624 626 - if (!xen_hvm_domain()) 625 + if (!xen_hvm_domain() || xen_initial_domain()) 627 626 return; 628 627 629 628 err = xenbus_scanf(XBT_NIL, "control",
+1
include/linux/swiotlb.h
··· 25 25 extern void swiotlb_init(int verbose); 26 26 extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); 27 27 extern unsigned long swiotlb_nr_tbl(void); 28 + extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); 28 29 29 30 /* 30 31 * Enumeration for sync targets
+12
include/xen/grant_table.h
··· 190 190 struct gnttab_map_grant_ref *kunmap_ops, 191 191 struct page **pages, unsigned int count); 192 192 193 + /* Perform a batch of grant map/copy operations. Retry every batch slot 194 + * for which the hypervisor returns GNTST_eagain. This is typically due 195 + * to paged out target frames. 196 + * 197 + * Will retry for 1, 2, ... 255 ms, i.e. 256 times during 32 seconds. 198 + * 199 + * Return value in each iand every status field of the batch guaranteed 200 + * to not be GNTST_eagain. 201 + */ 202 + void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count); 203 + void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count); 204 + 193 205 #endif /* __ASM_GNTTAB_H__ */
+8 -4
include/xen/interface/grant_table.h
··· 338 338 #define GNTTABOP_transfer 4 339 339 struct gnttab_transfer { 340 340 /* IN parameters. */ 341 - unsigned long mfn; 341 + xen_pfn_t mfn; 342 342 domid_t domid; 343 343 grant_ref_t ref; 344 344 /* OUT parameters. */ ··· 375 375 struct { 376 376 union { 377 377 grant_ref_t ref; 378 - unsigned long gmfn; 378 + xen_pfn_t gmfn; 379 379 } u; 380 380 domid_t domid; 381 381 uint16_t offset; ··· 519 519 #define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ 520 520 #define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ 521 521 #define GNTST_bad_page (-9) /* Specified page was invalid for op. */ 522 - #define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary */ 522 + #define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ 523 + #define GNTST_address_too_big (-11) /* transfer page address too large. */ 524 + #define GNTST_eagain (-12) /* Operation not done; try again. */ 523 525 524 526 #define GNTTABOP_error_msgs { \ 525 527 "okay", \ ··· 534 532 "no spare translation slot in the I/O MMU", \ 535 533 "permission denied", \ 536 534 "bad page", \ 537 - "copy arguments cross page boundary" \ 535 + "copy arguments cross page boundary", \ 536 + "page address size too large", \ 537 + "operation not done; try again" \ 538 538 } 539 539 540 540 #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
+6 -3
include/xen/interface/memory.h
··· 31 31 * OUT: GMFN bases of extents that were allocated 32 32 * (NB. This command also updates the mach_to_phys translation table) 33 33 */ 34 - GUEST_HANDLE(ulong) extent_start; 34 + GUEST_HANDLE(xen_pfn_t) extent_start; 35 35 36 36 /* Number of extents, and size/alignment of each (2^extent_order pages). */ 37 37 unsigned long nr_extents; ··· 130 130 * any large discontiguities in the machine address space, 2MB gaps in 131 131 * the machphys table will be represented by an MFN base of zero. 132 132 */ 133 - GUEST_HANDLE(ulong) extent_start; 133 + GUEST_HANDLE(xen_pfn_t) extent_start; 134 134 135 135 /* 136 136 * Number of extents written to the above array. This will be smaller ··· 163 163 /* Which domain to change the mapping for. */ 164 164 domid_t domid; 165 165 166 + /* Number of pages to go through for gmfn_range */ 167 + uint16_t size; 168 + 166 169 /* Source mapping space. */ 167 170 #define XENMAPSPACE_shared_info 0 /* shared info page */ 168 171 #define XENMAPSPACE_grant_table 1 /* grant table page */ ··· 175 172 unsigned long idx; 176 173 177 174 /* GPFN where the source mapping page should appear. */ 178 - unsigned long gpfn; 175 + xen_pfn_t gpfn; 179 176 }; 180 177 DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); 181 178
+5 -2
include/xen/interface/platform.h
··· 54 54 #define XENPF_add_memtype 31 55 55 struct xenpf_add_memtype { 56 56 /* IN variables. */ 57 - unsigned long mfn; 57 + xen_pfn_t mfn; 58 58 uint64_t nr_mfns; 59 59 uint32_t type; 60 60 /* OUT variables. */ ··· 84 84 /* IN variables. */ 85 85 uint32_t reg; 86 86 /* OUT variables. */ 87 - unsigned long mfn; 87 + xen_pfn_t mfn; 88 88 uint64_t nr_mfns; 89 89 uint32_t type; 90 90 }; ··· 112 112 #define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ 113 113 #define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ 114 114 #define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ 115 + #define XEN_FW_KBD_SHIFT_FLAGS 5 /* Int16, Fn02: Get keyboard shift flags. */ 115 116 struct xenpf_firmware_info { 116 117 /* IN variables. */ 117 118 uint32_t type; ··· 143 142 /* must refer to 128-byte buffer */ 144 143 GUEST_HANDLE(uchar) edid; 145 144 } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ 145 + 146 + uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */ 146 147 } u; 147 148 }; 148 149 DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t);
+3
include/xen/interface/version.h
··· 60 60 /* arg == NULL; returns host memory page size. */ 61 61 #define XENVER_pagesize 7 62 62 63 + /* arg == xen_domain_handle_t. */ 64 + #define XENVER_guest_handle 8 65 + 63 66 #endif /* __XEN_PUBLIC_VERSION_H__ */
+4 -4
include/xen/interface/xen.h
··· 10 10 #define __XEN_PUBLIC_XEN_H__ 11 11 12 12 #include <asm/xen/interface.h> 13 - #include <asm/pvclock-abi.h> 14 13 15 14 /* 16 15 * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). ··· 189 190 unsigned int cmd; 190 191 union { 191 192 /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ 192 - unsigned long mfn; 193 + xen_pfn_t mfn; 193 194 /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ 194 195 unsigned long linear_addr; 195 196 } arg1; ··· 429 430 unsigned long nr_pages; /* Total pages allocated to this domain. */ 430 431 unsigned long shared_info; /* MACHINE address of shared info struct. */ 431 432 uint32_t flags; /* SIF_xxx flags. */ 432 - unsigned long store_mfn; /* MACHINE page number of shared page. */ 433 + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ 433 434 uint32_t store_evtchn; /* Event channel for store communication. */ 434 435 union { 435 436 struct { 436 - unsigned long mfn; /* MACHINE page number of console page. */ 437 + xen_pfn_t mfn; /* MACHINE page number of console page. */ 437 438 uint32_t evtchn; /* Event channel for console page. */ 438 439 } domU; 439 440 struct { ··· 454 455 uint8_t video_type; 455 456 #define XEN_VGATYPE_TEXT_MODE_3 0x03 456 457 #define XEN_VGATYPE_VESA_LFB 0x23 458 + #define XEN_VGATYPE_EFI_LFB 0x70 457 459 458 460 union { 459 461 struct {
+24 -3
include/xen/privcmd.h
··· 35 35 36 36 #include <linux/types.h> 37 37 #include <linux/compiler.h> 38 - 39 - typedef unsigned long xen_pfn_t; 38 + #include <xen/interface/xen.h> 40 39 41 40 struct privcmd_hypercall { 42 41 __u64 op; ··· 58 59 int num; /* number of pages to populate */ 59 60 domid_t dom; /* target domain */ 60 61 __u64 addr; /* virtual address */ 61 - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ 62 + xen_pfn_t __user *arr; /* array of mfns - or'd with 63 + PRIVCMD_MMAPBATCH_*_ERROR on err */ 64 + }; 65 + 66 + #define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U 67 + #define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U 68 + 69 + struct privcmd_mmapbatch_v2 { 70 + unsigned int num; /* number of pages to populate */ 71 + domid_t dom; /* target domain */ 72 + __u64 addr; /* virtual address */ 73 + const xen_pfn_t __user *arr; /* array of mfns */ 74 + int __user *err; /* array of error codes */ 62 75 }; 63 76 64 77 /* 65 78 * @cmd: IOCTL_PRIVCMD_HYPERCALL 66 79 * @arg: &privcmd_hypercall_t 67 80 * Return: Value returned from execution of the specified hypercall. 81 + * 82 + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2 83 + * @arg: &struct privcmd_mmapbatch_v2 84 + * Return: 0 on success (i.e., arg->err contains valid error codes for 85 + * each frame). On an error other than a failed frame remap, -1 is 86 + * returned and errno is set to EINVAL, EFAULT etc. As an exception, 87 + * if the operation was otherwise successful but any frame failed with 88 + * -ENOENT, then -1 is returned and errno is set to ENOENT. 68 89 */ 69 90 #define IOCTL_PRIVCMD_HYPERCALL \ 70 91 _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) ··· 92 73 _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) 93 74 #define IOCTL_PRIVCMD_MMAPBATCH \ 94 75 _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) 76 + #define IOCTL_PRIVCMD_MMAPBATCH_V2 \ 77 + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) 95 78 96 79 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
+1 -10
include/xen/swiotlb-xen.h
··· 3 3 4 4 #include <linux/swiotlb.h> 5 5 6 - extern void xen_swiotlb_init(int verbose); 6 + extern int xen_swiotlb_init(int verbose, bool early); 7 7 8 8 extern void 9 9 *xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, ··· 23 23 extern void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 24 24 size_t size, enum dma_data_direction dir, 25 25 struct dma_attrs *attrs); 26 - /* 27 - extern int 28 - xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, 29 - enum dma_data_direction dir); 30 - 31 - extern void 32 - xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, 33 - enum dma_data_direction dir); 34 - */ 35 26 extern int 36 27 xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, 37 28 int nelems, enum dma_data_direction dir,
+24 -9
lib/swiotlb.c
··· 170 170 * Statically reserve bounce buffer space and initialize bounce buffer data 171 171 * structures for the software IO TLB used to implement the DMA API. 172 172 */ 173 - void __init 173 + static void __init 174 174 swiotlb_init_with_default_size(size_t default_size, int verbose) 175 175 { 176 176 unsigned long bytes; ··· 206 206 int 207 207 swiotlb_late_init_with_default_size(size_t default_size) 208 208 { 209 - unsigned long i, bytes, req_nslabs = io_tlb_nslabs; 209 + unsigned long bytes, req_nslabs = io_tlb_nslabs; 210 210 unsigned int order; 211 + int rc = 0; 211 212 212 213 if (!io_tlb_nslabs) { 213 214 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); ··· 230 229 order--; 231 230 } 232 231 233 - if (!io_tlb_start) 234 - goto cleanup1; 235 - 232 + if (!io_tlb_start) { 233 + io_tlb_nslabs = req_nslabs; 234 + return -ENOMEM; 235 + } 236 236 if (order != get_order(bytes)) { 237 237 printk(KERN_WARNING "Warning: only able to allocate %ld MB " 238 238 "for software IO TLB\n", (PAGE_SIZE << order) >> 20); 239 239 io_tlb_nslabs = SLABS_PER_PAGE << order; 240 - bytes = io_tlb_nslabs << IO_TLB_SHIFT; 241 240 } 241 + rc = swiotlb_late_init_with_tbl(io_tlb_start, io_tlb_nslabs); 242 + if (rc) 243 + free_pages((unsigned long)io_tlb_start, order); 244 + return rc; 245 + } 246 + 247 + int 248 + swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) 249 + { 250 + unsigned long i, bytes; 251 + 252 + bytes = nslabs << IO_TLB_SHIFT; 253 + 254 + io_tlb_nslabs = nslabs; 255 + io_tlb_start = tlb; 242 256 io_tlb_end = io_tlb_start + bytes; 257 + 243 258 memset(io_tlb_start, 0, bytes); 244 259 245 260 /* ··· 305 288 io_tlb_list = NULL; 306 289 cleanup2: 307 290 io_tlb_end = NULL; 308 - free_pages((unsigned long)io_tlb_start, order); 309 291 io_tlb_start = NULL; 310 - cleanup1: 311 - io_tlb_nslabs = req_nslabs; 292 + io_tlb_nslabs = 0; 312 293 return -ENOMEM; 313 294 } 314 295