Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge remote-tracking branch 'origin/x86/mm' into x86/mm2

Explicitly merging these two branches due to nontrivial conflicts and
to allow further work.

Resolved Conflicts:
arch/x86/kernel/head32.c
arch/x86/kernel/head64.c
arch/x86/mm/init_64.c
arch/x86/realmode/init.c

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

+199 -287
-4
arch/x86/Kconfig
··· 1253 1253 Specify the maximum number of NUMA Nodes available on the target 1254 1254 system. Increases memory reserved to accommodate various tables. 1255 1255 1256 - config HAVE_ARCH_ALLOC_REMAP 1257 - def_bool y 1258 - depends on X86_32 && NUMA 1259 - 1260 1256 config ARCH_HAVE_MEMORY_PRESENT 1261 1257 def_bool y 1262 1258 depends on X86_32 && DISCONTIGMEM
-6
arch/x86/include/asm/mmzone_32.h
··· 14 14 15 15 #include <asm/numaq.h> 16 16 17 - extern void resume_map_numa_kva(pgd_t *pgd); 18 - 19 - #else /* !CONFIG_NUMA */ 20 - 21 - static inline void resume_map_numa_kva(pgd_t *pgd) {} 22 - 23 17 #endif /* CONFIG_NUMA */ 24 18 25 19 #ifdef CONFIG_DISCONTIGMEM
+2 -1
arch/x86/include/asm/page.h
··· 48 48 * case properly. Once all supported versions of gcc understand it, we can 49 49 * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) 50 50 */ 51 - #define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) 51 + #define __pa_symbol(x) \ 52 + __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x))) 52 53 53 54 #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) 54 55
+1
arch/x86/include/asm/page_32.h
··· 15 15 #else 16 16 #define __phys_addr(x) __phys_addr_nodebug(x) 17 17 #endif 18 + #define __phys_addr_symbol(x) __phys_addr(x) 18 19 #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) 19 20 20 21 #ifdef CONFIG_FLATMEM
+36
arch/x86/include/asm/page_64.h
··· 3 3 4 4 #include <asm/page_64_types.h> 5 5 6 + #ifndef __ASSEMBLY__ 7 + 8 + /* duplicated to the one in bootmem.h */ 9 + extern unsigned long max_pfn; 10 + extern unsigned long phys_base; 11 + 12 + static inline unsigned long __phys_addr_nodebug(unsigned long x) 13 + { 14 + unsigned long y = x - __START_KERNEL_map; 15 + 16 + /* use the carry flag to determine if x was < __START_KERNEL_map */ 17 + x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET)); 18 + 19 + return x; 20 + } 21 + 22 + #ifdef CONFIG_DEBUG_VIRTUAL 23 + extern unsigned long __phys_addr(unsigned long); 24 + extern unsigned long __phys_addr_symbol(unsigned long); 25 + #else 26 + #define __phys_addr(x) __phys_addr_nodebug(x) 27 + #define __phys_addr_symbol(x) \ 28 + ((unsigned long)(x) - __START_KERNEL_map + phys_base) 29 + #endif 30 + 31 + #define __phys_reloc_hide(x) (x) 32 + 33 + #ifdef CONFIG_FLATMEM 34 + #define pfn_valid(pfn) ((pfn) < max_pfn) 35 + #endif 36 + 37 + void clear_page(void *page); 38 + void copy_page(void *to, void *from); 39 + 40 + #endif /* !__ASSEMBLY__ */ 41 + 6 42 #endif /* _ASM_X86_PAGE_64_H */
-22
arch/x86/include/asm/page_64_types.h
··· 50 50 #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) 51 51 #define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) 52 52 53 - #ifndef __ASSEMBLY__ 54 - void clear_page(void *page); 55 - void copy_page(void *to, void *from); 56 - 57 - /* duplicated to the one in bootmem.h */ 58 - extern unsigned long max_pfn; 59 - extern unsigned long phys_base; 60 - 61 - extern unsigned long __phys_addr(unsigned long); 62 - #define __phys_reloc_hide(x) (x) 63 - 64 - #define vmemmap ((struct page *)VMEMMAP_START) 65 - 66 - extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); 67 - extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); 68 - 69 - #endif /* !__ASSEMBLY__ */ 70 - 71 - #ifdef CONFIG_FLATMEM 72 - #define pfn_valid(pfn) ((pfn) < max_pfn) 73 - #endif 74 - 75 53 #endif /* _ASM_X86_PAGE_64_DEFS_H */
+14
arch/x86/include/asm/pgtable.h
··· 390 390 391 391 #ifndef __ASSEMBLY__ 392 392 #include <linux/mm_types.h> 393 + #include <linux/log2.h> 393 394 394 395 static inline int pte_none(pte_t pte) 395 396 { ··· 784 783 memcpy(dst, src, count * sizeof(pgd_t)); 785 784 } 786 785 786 + #define PTE_SHIFT ilog2(PTRS_PER_PTE) 787 + static inline int page_level_shift(enum pg_level level) 788 + { 789 + return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT; 790 + } 791 + static inline unsigned long page_level_size(enum pg_level level) 792 + { 793 + return 1UL << page_level_shift(level); 794 + } 795 + static inline unsigned long page_level_mask(enum pg_level level) 796 + { 797 + return ~(page_level_size(level) - 1); 798 + } 787 799 788 800 #include <asm-generic/pgtable.h> 789 801 #endif /* __ASSEMBLY__ */
+5
arch/x86/include/asm/pgtable_64.h
··· 183 183 184 184 #define __HAVE_ARCH_PTE_SAME 185 185 186 + #define vmemmap ((struct page *)VMEMMAP_START) 187 + 188 + extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); 189 + extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); 190 + 186 191 #endif /* !__ASSEMBLY__ */ 187 192 188 193 #endif /* _ASM_X86_PGTABLE_64_H */
+2 -1
arch/x86/include/asm/pgtable_types.h
··· 330 330 struct seq_file; 331 331 extern void arch_report_meminfo(struct seq_file *m); 332 332 333 - enum { 333 + enum pg_level { 334 334 PG_LEVEL_NONE, 335 335 PG_LEVEL_4K, 336 336 PG_LEVEL_2M, ··· 351 351 * as a pte too. 352 352 */ 353 353 extern pte_t *lookup_address(unsigned long address, unsigned int *level); 354 + extern phys_addr_t slow_virt_to_phys(void *__address); 354 355 355 356 #endif /* !__ASSEMBLY__ */ 356 357
+1 -1
arch/x86/kernel/acpi/sleep.c
··· 69 69 70 70 #ifndef CONFIG_64BIT 71 71 header->pmode_entry = (u32)&wakeup_pmode_return; 72 - header->pmode_cr3 = (u32)__pa(&initial_page_table); 72 + header->pmode_cr3 = (u32)__pa_symbol(initial_page_table); 73 73 saved_magic = 0x12345678; 74 74 #else /* CONFIG_64BIT */ 75 75 #ifdef CONFIG_SMP
+1
arch/x86/kernel/apic/apic_numachip.c
··· 28 28 #include <asm/apic.h> 29 29 #include <asm/ipi.h> 30 30 #include <asm/apic_flat_64.h> 31 + #include <asm/pgtable.h> 31 32 32 33 static int numachip_system __read_mostly; 33 34
+1 -1
arch/x86/kernel/cpu/intel.c
··· 167 167 #ifdef CONFIG_X86_F00F_BUG 168 168 static void __cpuinit trap_init_f00f_bug(void) 169 169 { 170 - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); 170 + __set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); 171 171 172 172 /* 173 173 * Update the IDT descriptor and reload the IDT so that
+2 -2
arch/x86/kernel/ftrace.c
··· 89 89 * kernel identity mapping to modify code. 90 90 */ 91 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 92 - ip = (unsigned long)__va(__pa(ip)); 92 + ip = (unsigned long)__va(__pa_symbol(ip)); 93 93 94 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); 95 95 } ··· 279 279 * kernel identity mapping to modify code. 280 280 */ 281 281 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 282 - ip = (unsigned long)__va(__pa(ip)); 282 + ip = (unsigned long)__va(__pa_symbol(ip)); 283 283 284 284 return probe_kernel_write((void *)ip, val, size); 285 285 }
+5 -4
arch/x86/kernel/kvm.c
··· 297 297 298 298 memset(st, 0, sizeof(*st)); 299 299 300 - wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); 300 + wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); 301 301 printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", 302 - cpu, __pa(st)); 302 + cpu, slow_virt_to_phys(st)); 303 303 } 304 304 305 305 static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; ··· 324 324 return; 325 325 326 326 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { 327 - u64 pa = __pa(&__get_cpu_var(apf_reason)); 327 + u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason)); 328 328 329 329 #ifdef CONFIG_PREEMPT 330 330 pa |= KVM_ASYNC_PF_SEND_ALWAYS; ··· 340 340 /* Size alignment is implied but just to make it explicit. */ 341 341 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); 342 342 __get_cpu_var(kvm_apic_eoi) = 0; 343 - pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; 343 + pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi)) 344 + | KVM_MSR_ENABLED; 344 345 wrmsrl(MSR_KVM_PV_EOI_EN, pa); 345 346 } 346 347
+2 -2
arch/x86/kernel/kvmclock.c
··· 162 162 int low, high, ret; 163 163 struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; 164 164 165 - low = (int)__pa(src) | 1; 166 - high = ((u64)__pa(src) >> 32); 165 + low = (int)slow_virt_to_phys(src) | 1; 166 + high = ((u64)slow_virt_to_phys(src) >> 32); 167 167 ret = native_write_msr_safe(msr_kvm_system_time, low, high); 168 168 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", 169 169 cpu, high, low, txt);
+8 -8
arch/x86/kernel/setup.c
··· 284 284 static void __init reserve_brk(void) 285 285 { 286 286 if (_brk_end > _brk_start) 287 - memblock_reserve(__pa(_brk_start), 288 - __pa(_brk_end) - __pa(_brk_start)); 287 + memblock_reserve(__pa_symbol(_brk_start), 288 + _brk_end - _brk_start); 289 289 290 290 /* Mark brk area as locked down and no longer taking any 291 291 new allocations */ ··· 903 903 init_mm.end_data = (unsigned long) _edata; 904 904 init_mm.brk = _brk_end; 905 905 906 - code_resource.start = virt_to_phys(_text); 907 - code_resource.end = virt_to_phys(_etext)-1; 908 - data_resource.start = virt_to_phys(_etext); 909 - data_resource.end = virt_to_phys(_edata)-1; 910 - bss_resource.start = virt_to_phys(&__bss_start); 911 - bss_resource.end = virt_to_phys(&__bss_stop)-1; 906 + code_resource.start = __pa_symbol(_text); 907 + code_resource.end = __pa_symbol(_etext)-1; 908 + data_resource.start = __pa_symbol(_etext); 909 + data_resource.end = __pa_symbol(_edata)-1; 910 + bss_resource.start = __pa_symbol(__bss_start); 911 + bss_resource.end = __pa_symbol(__bss_stop)-1; 912 912 913 913 #ifdef CONFIG_CMDLINE_BOOL 914 914 #ifdef CONFIG_CMDLINE_OVERRIDE
+3
arch/x86/kernel/x8664_ksyms_64.c
··· 59 59 EXPORT_SYMBOL(__memcpy); 60 60 EXPORT_SYMBOL(memmove); 61 61 62 + #ifndef CONFIG_DEBUG_VIRTUAL 63 + EXPORT_SYMBOL(phys_base); 64 + #endif 62 65 EXPORT_SYMBOL(empty_zero_page); 63 66 #ifndef CONFIG_PARAVIRT 64 67 EXPORT_SYMBOL(native_load_gs_index);
+2 -1
arch/x86/lguest/boot.c
··· 552 552 current_cr3 = cr3; 553 553 554 554 /* These two page tables are simple, linear, and used during boot */ 555 - if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) 555 + if (cr3 != __pa_symbol(swapper_pg_dir) && 556 + cr3 != __pa_symbol(initial_page_table)) 556 557 cr3_changed = true; 557 558 } 558 559
+8 -10
arch/x86/mm/init_64.c
··· 804 804 void mark_rodata_ro(void) 805 805 { 806 806 unsigned long start = PFN_ALIGN(_text); 807 - unsigned long rodata_start = 808 - ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 807 + unsigned long rodata_start = PFN_ALIGN(__start_rodata); 809 808 unsigned long end = (unsigned long) &__end_rodata_hpage_align; 810 - unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); 811 - unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); 812 - unsigned long data_start = (unsigned long) &_sdata; 809 + unsigned long text_end = PFN_ALIGN(&__stop___ex_table); 810 + unsigned long rodata_end = PFN_ALIGN(&__end_rodata); 813 811 unsigned long all_end = PFN_ALIGN(&_end); 814 812 815 813 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", ··· 833 835 #endif 834 836 835 837 free_init_pages("unused kernel memory", 836 - (unsigned long) page_address(virt_to_page(text_end)), 837 - (unsigned long) 838 - page_address(virt_to_page(rodata_start))); 838 + (unsigned long) __va(__pa_symbol(text_end)), 839 + (unsigned long) __va(__pa_symbol(rodata_start))); 840 + 839 841 free_init_pages("unused kernel memory", 840 - (unsigned long) page_address(virt_to_page(rodata_end)), 841 - (unsigned long) page_address(virt_to_page(data_start))); 842 + (unsigned long) __va(__pa_symbol(rodata_end)), 843 + (unsigned long) __va(__pa_symbol(_sdata))); 842 844 } 843 845 844 846 #endif
+11 -21
arch/x86/mm/numa.c
··· 193 193 static void __init setup_node_data(int nid, u64 start, u64 end) 194 194 { 195 195 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 196 - bool remapped = false; 197 196 u64 nd_pa; 198 197 void *nd; 199 198 int tnid; ··· 204 205 if (end && (end - start) < NODE_MIN_SIZE) 205 206 return; 206 207 207 - /* initialize remap allocator before aligning to ZONE_ALIGN */ 208 - init_alloc_remap(nid, start, end); 209 - 210 208 start = roundup(start, ZONE_ALIGN); 211 209 212 210 printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", 213 211 nid, start, end - 1); 214 212 215 213 /* 216 - * Allocate node data. Try remap allocator first, node-local 217 - * memory and then any node. Never allocate in DMA zone. 214 + * Allocate node data. Try node-local memory and then any node. 215 + * Never allocate in DMA zone. 218 216 */ 219 - nd = alloc_remap(nid, nd_size); 220 - if (nd) { 221 - nd_pa = __pa(nd); 222 - remapped = true; 223 - } else { 224 - nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); 225 - if (!nd_pa) { 226 - pr_err("Cannot find %zu bytes in node %d\n", 227 - nd_size, nid); 228 - return; 229 - } 230 - nd = __va(nd_pa); 217 + nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); 218 + if (!nd_pa) { 219 + pr_err("Cannot find %zu bytes in node %d\n", 220 + nd_size, nid); 221 + return; 231 222 } 223 + nd = __va(nd_pa); 232 224 233 225 /* report and initialize */ 234 - printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n", 235 - nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); 226 + printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", 227 + nd_pa, nd_pa + nd_size - 1); 236 228 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 237 - if (!remapped && tnid != nid) 229 + if (tnid != nid) 238 230 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); 239 231 240 232 node_data[nid] = nd;
-161
arch/x86/mm/numa_32.c
··· 73 73 74 74 extern unsigned long highend_pfn, highstart_pfn; 75 75 76 - #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) 77 - 78 - static void *node_remap_start_vaddr[MAX_NUMNODES]; 79 - void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 80 - 81 - /* 82 - * Remap memory allocator 83 - */ 84 - static unsigned long node_remap_start_pfn[MAX_NUMNODES]; 85 - static void *node_remap_end_vaddr[MAX_NUMNODES]; 86 - static void *node_remap_alloc_vaddr[MAX_NUMNODES]; 87 - 88 - /** 89 - * alloc_remap - Allocate remapped memory 90 - * @nid: NUMA node to allocate memory from 91 - * @size: The size of allocation 92 - * 93 - * Allocate @size bytes from the remap area of NUMA node @nid. The 94 - * size of the remap area is predetermined by init_alloc_remap() and 95 - * only the callers considered there should call this function. For 96 - * more info, please read the comment on top of init_alloc_remap(). 97 - * 98 - * The caller must be ready to handle allocation failure from this 99 - * function and fall back to regular memory allocator in such cases. 100 - * 101 - * CONTEXT: 102 - * Single CPU early boot context. 103 - * 104 - * RETURNS: 105 - * Pointer to the allocated memory on success, %NULL on failure. 106 - */ 107 - void *alloc_remap(int nid, unsigned long size) 108 - { 109 - void *allocation = node_remap_alloc_vaddr[nid]; 110 - 111 - size = ALIGN(size, L1_CACHE_BYTES); 112 - 113 - if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) 114 - return NULL; 115 - 116 - node_remap_alloc_vaddr[nid] += size; 117 - memset(allocation, 0, size); 118 - 119 - return allocation; 120 - } 121 - 122 - #ifdef CONFIG_HIBERNATION 123 - /** 124 - * resume_map_numa_kva - add KVA mapping to the temporary page tables created 125 - * during resume from hibernation 126 - * @pgd_base - temporary resume page directory 127 - */ 128 - void resume_map_numa_kva(pgd_t *pgd_base) 129 - { 130 - int node; 131 - 132 - for_each_online_node(node) { 133 - unsigned long start_va, start_pfn, nr_pages, pfn; 134 - 135 - start_va = (unsigned long)node_remap_start_vaddr[node]; 136 - start_pfn = node_remap_start_pfn[node]; 137 - nr_pages = (node_remap_end_vaddr[node] - 138 - node_remap_start_vaddr[node]) >> PAGE_SHIFT; 139 - 140 - printk(KERN_DEBUG "%s: node %d\n", __func__, node); 141 - 142 - for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { 143 - unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); 144 - pgd_t *pgd = pgd_base + pgd_index(vaddr); 145 - pud_t *pud = pud_offset(pgd, vaddr); 146 - pmd_t *pmd = pmd_offset(pud, vaddr); 147 - 148 - set_pmd(pmd, pfn_pmd(start_pfn + pfn, 149 - PAGE_KERNEL_LARGE_EXEC)); 150 - 151 - printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", 152 - __func__, vaddr, start_pfn + pfn); 153 - } 154 - } 155 - } 156 - #endif 157 - 158 - /** 159 - * init_alloc_remap - Initialize remap allocator for a NUMA node 160 - * @nid: NUMA node to initizlie remap allocator for 161 - * 162 - * NUMA nodes may end up without any lowmem. As allocating pgdat and 163 - * memmap on a different node with lowmem is inefficient, a special 164 - * remap allocator is implemented which can be used by alloc_remap(). 165 - * 166 - * For each node, the amount of memory which will be necessary for 167 - * pgdat and memmap is calculated and two memory areas of the size are 168 - * allocated - one in the node and the other in lowmem; then, the area 169 - * in the node is remapped to the lowmem area. 170 - * 171 - * As pgdat and memmap must be allocated in lowmem anyway, this 172 - * doesn't waste lowmem address space; however, the actual lowmem 173 - * which gets remapped over is wasted. The amount shouldn't be 174 - * problematic on machines this feature will be used. 175 - * 176 - * Initialization failure isn't fatal. alloc_remap() is used 177 - * opportunistically and the callers will fall back to other memory 178 - * allocation mechanisms on failure. 179 - */ 180 - void __init init_alloc_remap(int nid, u64 start, u64 end) 181 - { 182 - unsigned long start_pfn = start >> PAGE_SHIFT; 183 - unsigned long end_pfn = end >> PAGE_SHIFT; 184 - unsigned long size, pfn; 185 - u64 node_pa, remap_pa; 186 - void *remap_va; 187 - 188 - /* 189 - * The acpi/srat node info can show hot-add memroy zones where 190 - * memory could be added but not currently present. 191 - */ 192 - printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", 193 - nid, start_pfn, end_pfn); 194 - 195 - /* calculate the necessary space aligned to large page size */ 196 - size = node_memmap_size_bytes(nid, start_pfn, end_pfn); 197 - size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); 198 - size = ALIGN(size, LARGE_PAGE_BYTES); 199 - 200 - /* allocate node memory and the lowmem remap area */ 201 - node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); 202 - if (!node_pa) { 203 - pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", 204 - size, nid); 205 - return; 206 - } 207 - memblock_reserve(node_pa, size); 208 - 209 - remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, 210 - max_low_pfn << PAGE_SHIFT, 211 - size, LARGE_PAGE_BYTES); 212 - if (!remap_pa) { 213 - pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", 214 - size, nid); 215 - memblock_free(node_pa, size); 216 - return; 217 - } 218 - memblock_reserve(remap_pa, size); 219 - remap_va = phys_to_virt(remap_pa); 220 - 221 - /* perform actual remap */ 222 - for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) 223 - set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), 224 - (node_pa >> PAGE_SHIFT) + pfn, 225 - PAGE_KERNEL_LARGE); 226 - 227 - /* initialize remap allocator parameters */ 228 - node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; 229 - node_remap_start_vaddr[nid] = remap_va; 230 - node_remap_end_vaddr[nid] = remap_va + size; 231 - node_remap_alloc_vaddr[nid] = remap_va; 232 - 233 - printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", 234 - nid, node_pa, node_pa + size, remap_va, remap_va + size); 235 - } 236 - 237 76 void __init initmem_init(void) 238 77 { 239 78 x86_numa_init();
-6
arch/x86/mm/numa_internal.h
··· 21 21 22 22 void __init x86_numa_init(void); 23 23 24 - #ifdef CONFIG_X86_64 25 - static inline void init_alloc_remap(int nid, u64 start, u64 end) { } 26 - #else 27 - void __init init_alloc_remap(int nid, u64 start, u64 end); 28 - #endif 29 - 30 24 #ifdef CONFIG_NUMA_EMU 31 25 void __init numa_emulation(struct numa_meminfo *numa_meminfo, 32 26 int numa_dist_cnt);
+39 -11
arch/x86/mm/pageattr.c
··· 94 94 95 95 static inline unsigned long highmap_start_pfn(void) 96 96 { 97 - return __pa(_text) >> PAGE_SHIFT; 97 + return __pa_symbol(_text) >> PAGE_SHIFT; 98 98 } 99 99 100 100 static inline unsigned long highmap_end_pfn(void) 101 101 { 102 - return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; 102 + return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; 103 103 } 104 104 105 105 #endif ··· 276 276 * The .rodata section needs to be read-only. Using the pfn 277 277 * catches all aliases. 278 278 */ 279 - if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, 280 - __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) 279 + if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, 280 + __pa_symbol(__end_rodata) >> PAGE_SHIFT)) 281 281 pgprot_val(forbidden) |= _PAGE_RW; 282 282 283 283 #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) ··· 364 364 EXPORT_SYMBOL_GPL(lookup_address); 365 365 366 366 /* 367 + * This is necessary because __pa() does not work on some 368 + * kinds of memory, like vmalloc() or the alloc_remap() 369 + * areas on 32-bit NUMA systems. The percpu areas can 370 + * end up in this kind of memory, for instance. 371 + * 372 + * This could be optimized, but it is only intended to be 373 + * used at inititalization time, and keeping it 374 + * unoptimized should increase the testing coverage for 375 + * the more obscure platforms. 376 + */ 377 + phys_addr_t slow_virt_to_phys(void *__virt_addr) 378 + { 379 + unsigned long virt_addr = (unsigned long)__virt_addr; 380 + phys_addr_t phys_addr; 381 + unsigned long offset; 382 + enum pg_level level; 383 + unsigned long psize; 384 + unsigned long pmask; 385 + pte_t *pte; 386 + 387 + pte = lookup_address(virt_addr, &level); 388 + BUG_ON(!pte); 389 + psize = page_level_size(level); 390 + pmask = page_level_mask(level); 391 + offset = virt_addr & ~pmask; 392 + phys_addr = pte_pfn(*pte) << PAGE_SHIFT; 393 + return (phys_addr | offset); 394 + } 395 + EXPORT_SYMBOL_GPL(slow_virt_to_phys); 396 + 397 + /* 367 398 * Set the new pmd in all the pgds we know about: 368 399 */ 369 400 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) ··· 427 396 pte_t new_pte, old_pte, *tmp; 428 397 pgprot_t old_prot, new_prot, req_prot; 429 398 int i, do_split = 1; 430 - unsigned int level; 399 + enum pg_level level; 431 400 432 401 if (cpa->force_split) 433 402 return 1; ··· 443 412 444 413 switch (level) { 445 414 case PG_LEVEL_2M: 446 - psize = PMD_PAGE_SIZE; 447 - pmask = PMD_PAGE_MASK; 448 - break; 449 415 #ifdef CONFIG_X86_64 450 416 case PG_LEVEL_1G: 451 - psize = PUD_PAGE_SIZE; 452 - pmask = PUD_PAGE_MASK; 453 - break; 454 417 #endif 418 + psize = page_level_size(level); 419 + pmask = page_level_mask(level); 420 + break; 455 421 default: 456 422 do_split = -EINVAL; 457 423 goto out_unlock;
+2 -2
arch/x86/mm/pat.c
··· 560 560 { 561 561 unsigned long id_sz; 562 562 563 - if (base >= __pa(high_memory)) 563 + if (base > __pa(high_memory-1)) 564 564 return 0; 565 565 566 - id_sz = (__pa(high_memory) < base + size) ? 566 + id_sz = (__pa(high_memory-1) <= base + size) ? 567 567 __pa(high_memory) - base : 568 568 size; 569 569
+6 -1
arch/x86/mm/pgtable.c
··· 334 334 if (changed && dirty) { 335 335 *pmdp = entry; 336 336 pmd_update_defer(vma->vm_mm, address, pmdp); 337 - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 337 + /* 338 + * We had a write-protection fault here and changed the pmd 339 + * to to more permissive. No need to flush the TLB for that, 340 + * #PF is architecturally guaranteed to do that and in the 341 + * worst-case we'll generate a spurious fault. 342 + */ 338 343 } 339 344 340 345 return changed;
+44 -16
arch/x86/mm/physaddr.c
··· 1 + #include <linux/bootmem.h> 1 2 #include <linux/mmdebug.h> 2 3 #include <linux/module.h> 3 4 #include <linux/mm.h> ··· 9 8 10 9 #ifdef CONFIG_X86_64 11 10 11 + #ifdef CONFIG_DEBUG_VIRTUAL 12 12 unsigned long __phys_addr(unsigned long x) 13 13 { 14 - if (x >= __START_KERNEL_map) { 15 - x -= __START_KERNEL_map; 16 - VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); 17 - x += phys_base; 14 + unsigned long y = x - __START_KERNEL_map; 15 + 16 + /* use the carry flag to determine if x was < __START_KERNEL_map */ 17 + if (unlikely(x > y)) { 18 + x = y + phys_base; 19 + 20 + VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); 18 21 } else { 19 - VIRTUAL_BUG_ON(x < PAGE_OFFSET); 20 - x -= PAGE_OFFSET; 21 - VIRTUAL_BUG_ON(!phys_addr_valid(x)); 22 + x = y + (__START_KERNEL_map - PAGE_OFFSET); 23 + 24 + /* carry flag will be set if starting x was >= PAGE_OFFSET */ 25 + VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x)); 22 26 } 27 + 23 28 return x; 24 29 } 25 30 EXPORT_SYMBOL(__phys_addr); 26 31 32 + unsigned long __phys_addr_symbol(unsigned long x) 33 + { 34 + unsigned long y = x - __START_KERNEL_map; 35 + 36 + /* only check upper bounds since lower bounds will trigger carry */ 37 + VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); 38 + 39 + return y + phys_base; 40 + } 41 + EXPORT_SYMBOL(__phys_addr_symbol); 42 + #endif 43 + 27 44 bool __virt_addr_valid(unsigned long x) 28 45 { 29 - if (x >= __START_KERNEL_map) { 30 - x -= __START_KERNEL_map; 31 - if (x >= KERNEL_IMAGE_SIZE) 46 + unsigned long y = x - __START_KERNEL_map; 47 + 48 + /* use the carry flag to determine if x was < __START_KERNEL_map */ 49 + if (unlikely(x > y)) { 50 + x = y + phys_base; 51 + 52 + if (y >= KERNEL_IMAGE_SIZE) 32 53 return false; 33 - x += phys_base; 34 54 } else { 35 - if (x < PAGE_OFFSET) 36 - return false; 37 - x -= PAGE_OFFSET; 38 - if (!phys_addr_valid(x)) 55 + x = y + (__START_KERNEL_map - PAGE_OFFSET); 56 + 57 + /* carry flag will be set if starting x was >= PAGE_OFFSET */ 58 + if ((x > y) || !phys_addr_valid(x)) 39 59 return false; 40 60 } 41 61 ··· 69 47 #ifdef CONFIG_DEBUG_VIRTUAL 70 48 unsigned long __phys_addr(unsigned long x) 71 49 { 50 + unsigned long phys_addr = x - PAGE_OFFSET; 72 51 /* VMALLOC_* aren't constants */ 73 52 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 74 53 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); 75 - return x - PAGE_OFFSET; 54 + /* max_low_pfn is set early, but not _that_ early */ 55 + if (max_low_pfn) { 56 + VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn); 57 + BUG_ON(slow_virt_to_phys((void *)x) != phys_addr); 58 + } 59 + return phys_addr; 76 60 } 77 61 EXPORT_SYMBOL(__phys_addr); 78 62 #endif
+2 -2
arch/x86/platform/efi/efi.c
··· 410 410 * - Not within any part of the kernel 411 411 * - Not the bios reserved area 412 412 */ 413 - if ((start+size >= virt_to_phys(_text) 414 - && start <= virt_to_phys(_end)) || 413 + if ((start+size >= __pa_symbol(_text) 414 + && start <= __pa_symbol(_end)) || 415 415 !e820_all_mapped(start, start+size, E820_RAM) || 416 416 memblock_is_region_reserved(start, size)) { 417 417 /* Could not reserve, skip it */
-2
arch/x86/power/hibernate_32.c
··· 129 129 } 130 130 } 131 131 132 - resume_map_numa_kva(pgd_base); 133 - 134 132 return 0; 135 133 } 136 134
+2 -2
arch/x86/realmode/init.c
··· 70 70 __va(real_mode_header->trampoline_header); 71 71 72 72 #ifdef CONFIG_X86_32 73 - trampoline_header->start = __pa(startup_32_smp); 73 + trampoline_header->start = __pa_symbol(startup_32_smp); 74 74 trampoline_header->gdt_limit = __BOOT_DS + 7; 75 - trampoline_header->gdt_base = __pa(boot_gdt); 75 + trampoline_header->gdt_base = __pa_symbol(boot_gdt); 76 76 #else 77 77 /* 78 78 * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR