Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes and cleanups from Thomas Gleixner:
"This set of updates contains:

- Robustification for the logical package managment. Cures the AMD
and virtualization issues.

- Put the correct start_cpu() return address on the stack of the idle
task.

- Fixups for the fallout of the nodeid <-> cpuid persistent mapping
modifciations

- Move the x86/MPX specific mm_struct member to the arch specific
mm_context where it belongs

- Cleanups for C89 struct initializers and useless function
arguments"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/floppy: Use designated initializers
x86/mpx: Move bd_addr to mm_context_t
x86/mm: Drop unused argument 'removed' from sync_global_pgds()
ACPI/NUMA: Do not map pxm to node when NUMA is turned off
x86/acpi: Use proper macro for invalid node
x86/smpboot: Prevent false positive out of bounds cpumask access warning
x86/boot/64: Push correct start_cpu() return address
x86/boot/64: Use 'push' instead of 'call' in start_cpu()
x86/smpboot: Make logical package management more robust

+77 -120
+2
arch/arm64/include/asm/numa.h
··· 15 15 16 16 extern nodemask_t numa_nodes_parsed __initdata; 17 17 18 + extern bool numa_off; 19 + 18 20 /* Mappings between node number and cpus on that node. */ 19 21 extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; 20 22 void numa_clear_node(unsigned int cpu);
+1 -1
arch/arm64/mm/numa.c
··· 35 35 36 36 static int numa_distance_cnt; 37 37 static u8 *numa_distance; 38 - static bool numa_off; 38 + bool numa_off; 39 39 40 40 static __init int numa_parse_early_param(char *opt) 41 41 {
+2
arch/ia64/include/asm/numa.h
··· 65 65 66 66 #define local_nodeid (cpu_to_node_map[smp_processor_id()]) 67 67 68 + #define numa_off 0 69 + 68 70 extern void map_cpu_to_node(int cpu, int nid); 69 71 extern void unmap_cpu_from_node(int cpu, int nid); 70 72 extern void numa_clear_node(int cpu);
+10 -10
arch/x86/include/asm/floppy.h
··· 229 229 int (*_dma_setup)(char *addr, unsigned long size, int mode, int io); 230 230 } fd_routine[] = { 231 231 { 232 - request_dma, 233 - free_dma, 234 - get_dma_residue, 235 - dma_mem_alloc, 236 - hard_dma_setup 232 + ._request_dma = request_dma, 233 + ._free_dma = free_dma, 234 + ._get_dma_residue = get_dma_residue, 235 + ._dma_mem_alloc = dma_mem_alloc, 236 + ._dma_setup = hard_dma_setup 237 237 }, 238 238 { 239 - vdma_request_dma, 240 - vdma_nop, 241 - vdma_get_dma_residue, 242 - vdma_mem_alloc, 243 - vdma_dma_setup 239 + ._request_dma = vdma_request_dma, 240 + ._free_dma = vdma_nop, 241 + ._get_dma_residue = vdma_get_dma_residue, 242 + ._dma_mem_alloc = vdma_mem_alloc, 243 + ._dma_setup = vdma_dma_setup 244 244 } 245 245 }; 246 246
+4
arch/x86/include/asm/mmu.h
··· 31 31 u16 pkey_allocation_map; 32 32 s16 execute_only_pkey; 33 33 #endif 34 + #ifdef CONFIG_X86_INTEL_MPX 35 + /* address of the bounds directory */ 36 + void __user *bd_addr; 37 + #endif 34 38 } mm_context_t; 35 39 36 40 #ifdef CONFIG_SMP
+2 -2
arch/x86/include/asm/mpx.h
··· 59 59 int mpx_handle_bd_fault(void); 60 60 static inline int kernel_managing_mpx_tables(struct mm_struct *mm) 61 61 { 62 - return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); 62 + return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR); 63 63 } 64 64 static inline void mpx_mm_init(struct mm_struct *mm) 65 65 { ··· 67 67 * NULL is theoretically a valid place to put the bounds 68 68 * directory, so point this at an invalid address. 69 69 */ 70 - mm->bd_addr = MPX_INVALID_BOUNDS_DIR; 70 + mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; 71 71 } 72 72 void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, 73 73 unsigned long start, unsigned long end);
+1 -2
arch/x86/include/asm/pgtable_64.h
··· 116 116 native_set_pgd(pgd, native_make_pgd(0)); 117 117 } 118 118 119 - extern void sync_global_pgds(unsigned long start, unsigned long end, 120 - int removed); 119 + extern void sync_global_pgds(unsigned long start, unsigned long end); 121 120 122 121 /* 123 122 * Conversion functions: convert a page and protection to a page entry,
+1 -1
arch/x86/kernel/acpi/boot.c
··· 715 715 int nid; 716 716 717 717 nid = acpi_get_node(handle); 718 - if (nid != -1) { 718 + if (nid != NUMA_NO_NODE) { 719 719 set_apicid_to_node(physid, nid); 720 720 numa_set_node(cpu, nid); 721 721 }
-15
arch/x86/kernel/apic/apic.c
··· 2160 2160 } 2161 2161 2162 2162 /* 2163 - * This can happen on physical hotplug. The sanity check at boot time 2164 - * is done from native_smp_prepare_cpus() after num_possible_cpus() is 2165 - * established. 2166 - */ 2167 - if (topology_update_package_map(apicid, cpu) < 0) { 2168 - int thiscpu = max + disabled_cpus; 2169 - 2170 - pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n", 2171 - thiscpu, apicid); 2172 - 2173 - disabled_cpus++; 2174 - return -ENOSPC; 2175 - } 2176 - 2177 - /* 2178 2163 * Validate version 2179 2164 */ 2180 2165 if (version == 0x0) {
+8 -16
arch/x86/kernel/cpu/common.c
··· 979 979 } 980 980 981 981 /* 982 - * The physical to logical package id mapping is initialized from the 983 - * acpi/mptables information. Make sure that CPUID actually agrees with 984 - * that. 982 + * Validate that ACPI/mptables have the same information about the 983 + * effective APIC id and update the package map. 985 984 */ 986 - static void sanitize_package_id(struct cpuinfo_x86 *c) 985 + static void validate_apic_and_package_id(struct cpuinfo_x86 *c) 987 986 { 988 987 #ifdef CONFIG_SMP 989 - unsigned int pkg, apicid, cpu = smp_processor_id(); 988 + unsigned int apicid, cpu = smp_processor_id(); 990 989 991 990 apicid = apic->cpu_present_to_apicid(cpu); 992 - pkg = apicid >> boot_cpu_data.x86_coreid_bits; 993 991 994 - if (apicid != c->initial_apicid) { 995 - pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n", 992 + if (apicid != c->apicid) { 993 + pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n", 996 994 cpu, apicid, c->initial_apicid); 997 - c->initial_apicid = apicid; 998 995 } 999 - if (pkg != c->phys_proc_id) { 1000 - pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n", 1001 - cpu, pkg, c->phys_proc_id); 1002 - c->phys_proc_id = pkg; 1003 - } 1004 - c->logical_proc_id = topology_phys_to_logical_pkg(pkg); 996 + BUG_ON(topology_update_package_map(c->phys_proc_id, cpu)); 1005 997 #else 1006 998 c->logical_proc_id = 0; 1007 999 #endif ··· 1124 1132 #ifdef CONFIG_NUMA 1125 1133 numa_add_cpu(smp_processor_id()); 1126 1134 #endif 1127 - sanitize_package_id(c); 1128 1135 } 1129 1136 1130 1137 /* ··· 1178 1187 enable_sep_cpu(); 1179 1188 #endif 1180 1189 mtrr_ap_init(); 1190 + validate_apic_and_package_id(c); 1181 1191 } 1182 1192 1183 1193 static __init int setup_noclflush(char *arg)
+3 -2
arch/x86/kernel/head_64.S
··· 298 298 * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect, 299 299 * address given in m16:64. 300 300 */ 301 - call 1f # put return address on stack for unwinder 302 - 1: xorq %rbp, %rbp # clear frame pointer 301 + pushq $.Lafter_lret # put return address on stack for unwinder 302 + xorq %rbp, %rbp # clear frame pointer 303 303 movq initial_code(%rip), %rax 304 304 pushq $__KERNEL_CS # set correct cs 305 305 pushq %rax # target address in negative space 306 306 lretq 307 + .Lafter_lret: 307 308 ENDPROC(start_cpu) 308 309 309 310 #include "verify_cpu.S"
+23 -36
arch/x86/kernel/smpboot.c
··· 103 103 unsigned int __max_logical_packages __read_mostly; 104 104 EXPORT_SYMBOL(__max_logical_packages); 105 105 static unsigned int logical_packages __read_mostly; 106 - static bool logical_packages_frozen __read_mostly; 107 106 108 107 /* Maximum number of SMT threads on any online core */ 109 108 int __max_smt_threads __read_mostly; ··· 272 273 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 273 274 } 274 275 275 - int topology_update_package_map(unsigned int apicid, unsigned int cpu) 276 + /** 277 + * topology_update_package_map - Update the physical to logical package map 278 + * @pkg: The physical package id as retrieved via CPUID 279 + * @cpu: The cpu for which this is updated 280 + */ 281 + int topology_update_package_map(unsigned int pkg, unsigned int cpu) 276 282 { 277 - unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits; 283 + unsigned int new; 278 284 279 285 /* Called from early boot ? */ 280 286 if (!physical_package_map) ··· 292 288 if (test_and_set_bit(pkg, physical_package_map)) 293 289 goto found; 294 290 295 - if (logical_packages_frozen) { 296 - physical_to_logical_pkg[pkg] = -1; 297 - pr_warn("APIC(%x) Package %u exceeds logical package max\n", 298 - apicid, pkg); 291 + if (logical_packages >= __max_logical_packages) { 292 + pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n", 293 + logical_packages, cpu, __max_logical_packages); 299 294 return -ENOSPC; 300 295 } 301 296 302 297 new = logical_packages++; 303 - pr_info("APIC(%x) Converting physical %u to logical package %u\n", 304 - apicid, pkg, new); 298 + if (new != pkg) { 299 + pr_info("CPU %u Converting physical %u to logical package %u\n", 300 + cpu, pkg, new); 301 + } 305 302 physical_to_logical_pkg[pkg] = new; 306 303 307 304 found: ··· 323 318 } 324 319 EXPORT_SYMBOL(topology_phys_to_logical_pkg); 325 320 326 - static void __init smp_init_package_map(void) 321 + static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu) 327 322 { 328 - unsigned int ncpus, cpu; 323 + unsigned int ncpus; 329 324 size_t size; 330 325 331 326 /* ··· 370 365 size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); 371 366 physical_package_map = kzalloc(size, GFP_KERNEL); 372 367 373 - for_each_present_cpu(cpu) { 374 - unsigned int apicid = apic->cpu_present_to_apicid(cpu); 375 - 376 - if (apicid == BAD_APICID || !apic->apic_id_valid(apicid)) 377 - continue; 378 - if (!topology_update_package_map(apicid, cpu)) 379 - continue; 380 - pr_warn("CPU %u APICId %x disabled\n", cpu, apicid); 381 - per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID; 382 - set_cpu_possible(cpu, false); 383 - set_cpu_present(cpu, false); 384 - } 385 - 386 - if (logical_packages > __max_logical_packages) { 387 - pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n", 388 - logical_packages, __max_logical_packages); 389 - logical_packages_frozen = true; 390 - __max_logical_packages = logical_packages; 391 - } 392 - 393 368 pr_info("Max logical packages: %u\n", __max_logical_packages); 369 + 370 + topology_update_package_map(c->phys_proc_id, cpu); 394 371 } 395 372 396 373 void __init smp_store_boot_cpu_info(void) ··· 382 395 383 396 *c = boot_cpu_data; 384 397 c->cpu_index = id; 385 - smp_init_package_map(); 398 + smp_init_package_map(c, id); 386 399 } 387 400 388 401 /* ··· 1463 1476 possible = i; 1464 1477 } 1465 1478 1479 + nr_cpu_ids = possible; 1480 + 1466 1481 pr_info("Allowing %d CPUs, %d hotplug CPUs\n", 1467 1482 possible, max_t(int, possible - num_processors, 0)); 1468 1483 1484 + reset_cpu_possible_mask(); 1485 + 1469 1486 for (i = 0; i < possible; i++) 1470 1487 set_cpu_possible(i, true); 1471 - for (; i < NR_CPUS; i++) 1472 - set_cpu_possible(i, false); 1473 - 1474 - nr_cpu_ids = possible; 1475 1488 } 1476 1489 1477 1490 #ifdef CONFIG_HOTPLUG_CPU
+1 -1
arch/x86/mm/fault.c
··· 413 413 414 414 void vmalloc_sync_all(void) 415 415 { 416 - sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0); 416 + sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); 417 417 } 418 418 419 419 /*
+7 -17
arch/x86/mm/init_64.c
··· 89 89 __setup("noexec32=", nonx32_setup); 90 90 91 91 /* 92 - * When memory was added/removed make sure all the processes MM have 92 + * When memory was added make sure all the processes MM have 93 93 * suitable PGD entries in the local PGD level page. 94 94 */ 95 - void sync_global_pgds(unsigned long start, unsigned long end, int removed) 95 + void sync_global_pgds(unsigned long start, unsigned long end) 96 96 { 97 97 unsigned long address; 98 98 ··· 100 100 const pgd_t *pgd_ref = pgd_offset_k(address); 101 101 struct page *page; 102 102 103 - /* 104 - * When it is called after memory hot remove, pgd_none() 105 - * returns true. In this case (removed == 1), we must clear 106 - * the PGD entries in the local PGD level page. 107 - */ 108 - if (pgd_none(*pgd_ref) && !removed) 103 + if (pgd_none(*pgd_ref)) 109 104 continue; 110 105 111 106 spin_lock(&pgd_lock); ··· 117 122 BUG_ON(pgd_page_vaddr(*pgd) 118 123 != pgd_page_vaddr(*pgd_ref)); 119 124 120 - if (removed) { 121 - if (pgd_none(*pgd_ref) && !pgd_none(*pgd)) 122 - pgd_clear(pgd); 123 - } else { 124 - if (pgd_none(*pgd)) 125 - set_pgd(pgd, *pgd_ref); 126 - } 125 + if (pgd_none(*pgd)) 126 + set_pgd(pgd, *pgd_ref); 127 127 128 128 spin_unlock(pgt_lock); 129 129 } ··· 586 596 } 587 597 588 598 if (pgd_changed) 589 - sync_global_pgds(vaddr_start, vaddr_end - 1, 0); 599 + sync_global_pgds(vaddr_start, vaddr_end - 1); 590 600 591 601 __flush_tlb_all(); 592 602 ··· 1229 1239 } else 1230 1240 err = vmemmap_populate_basepages(start, end, node); 1231 1241 if (!err) 1232 - sync_global_pgds(start, end - 1, 0); 1242 + sync_global_pgds(start, end - 1); 1233 1243 return err; 1234 1244 } 1235 1245
+5 -5
arch/x86/mm/mpx.c
··· 350 350 * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is 351 351 * expected to be relatively expensive. Storing the bounds 352 352 * directory here means that we do not have to do xsave in the 353 - * unmap path; we can just use mm->bd_addr instead. 353 + * unmap path; we can just use mm->context.bd_addr instead. 354 354 */ 355 355 bd_base = mpx_get_bounds_dir(); 356 356 down_write(&mm->mmap_sem); 357 - mm->bd_addr = bd_base; 358 - if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR) 357 + mm->context.bd_addr = bd_base; 358 + if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR) 359 359 ret = -ENXIO; 360 360 361 361 up_write(&mm->mmap_sem); ··· 370 370 return -ENXIO; 371 371 372 372 down_write(&mm->mmap_sem); 373 - mm->bd_addr = MPX_INVALID_BOUNDS_DIR; 373 + mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; 374 374 up_write(&mm->mmap_sem); 375 375 return 0; 376 376 } ··· 947 947 end = bta_end_vaddr; 948 948 } 949 949 950 - bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); 950 + bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start); 951 951 ret = get_bt_addr(mm, bde_vaddr, &bt_addr); 952 952 /* 953 953 * No bounds table there, so nothing to unmap.
+1 -1
arch/x86/mm/numa.c
··· 19 19 20 20 #include "numa_internal.h" 21 21 22 - int __initdata numa_off; 22 + int numa_off; 23 23 nodemask_t numa_nodes_parsed __initdata; 24 24 25 25 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-6
arch/x86/xen/smp.c
··· 87 87 cpu_data(cpu).x86_max_cores = 1; 88 88 set_cpu_sibling_map(cpu); 89 89 90 - /* 91 - * identify_cpu() may have set logical_pkg_id to -1 due 92 - * to incorrect phys_proc_id. Let's re-comupte it. 93 - */ 94 - topology_update_package_map(apic->cpu_present_to_apicid(cpu), cpu); 95 - 96 90 xen_setup_cpu_clockevents(); 97 91 98 92 notify_cpu_starting(cpu);
+1 -1
drivers/acpi/numa.c
··· 70 70 { 71 71 int node; 72 72 73 - if (pxm < 0 || pxm >= MAX_PXM_DOMAINS) 73 + if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off) 74 74 return NUMA_NO_NODE; 75 75 76 76 node = pxm_to_node_map[pxm];
+5
include/linux/cpumask.h
··· 722 722 void init_cpu_possible(const struct cpumask *src); 723 723 void init_cpu_online(const struct cpumask *src); 724 724 725 + static inline void reset_cpu_possible_mask(void) 726 + { 727 + bitmap_zero(cpumask_bits(&__cpu_possible_mask), NR_CPUS); 728 + } 729 + 725 730 static inline void 726 731 set_cpu_possible(unsigned int cpu, bool possible) 727 732 {
-4
include/linux/mm_types.h
··· 509 509 bool tlb_flush_pending; 510 510 #endif 511 511 struct uprobes_state uprobes_state; 512 - #ifdef CONFIG_X86_INTEL_MPX 513 - /* address of the bounds directory */ 514 - void __user *bd_addr; 515 - #endif 516 512 #ifdef CONFIG_HUGETLB_PAGE 517 513 atomic_long_t hugetlb_usage; 518 514 #endif