Merge x86-64 update from Andi · tjh.dev/kernel@4060994

+10 -2

Documentation/x86_64/boot-options.txt

··· 7 7 8 8 mce=off disable machine check 9 9 mce=bootlog Enable logging of machine checks left over from booting. 10 - Disabled by default because some BIOS leave bogus ones. 10 + Disabled by default on AMD because some BIOS leave bogus ones. 11 11 If your BIOS doesn't do that it's a good idea to enable though 12 12 to make sure you log even machine check events that result 13 - in a reboot. 13 + in a reboot. On Intel systems it is enabled by default. 14 + mce=nobootlog 15 + Disable boot machine check logging. 14 16 mce=tolerancelevel (number) 15 17 0: always panic, 1: panic if deadlock possible, 16 18 2: try to avoid panic, 3: never panic or exit (for testing) ··· 124 122 125 123 cpumask=MASK only use cpus with bits set in mask 126 124 125 + additional_cpus=NUM Allow NUM more CPUs for hotplug 126 + (defaults are specified by the BIOS or half the available CPUs) 127 + 127 128 NUMA 128 129 129 130 numa=off Only set up a single NUMA node spanning all memory. ··· 192 187 Useful together with panic=30 to trigger a reboot. 193 188 194 189 kstack=N Print that many words from the kernel stack in oops dumps. 190 + 191 + pagefaulttrace Dump all page faults. Only useful for extreme debugging 192 + and will create a lot of output. 195 193 196 194 Misc 197 195

+5 -1

Documentation/x86_64/mm.txt

··· 6 6 0000000000000000 - 00007fffffffffff (=47bits) user space, different per mm 7 7 hole caused by [48:63] sign extension 8 8 ffff800000000000 - ffff80ffffffffff (=40bits) guard hole 9 - ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of phys. memory 9 + ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of all phys. memory 10 10 ffffc10000000000 - ffffc1ffffffffff (=40bits) hole 11 11 ffffc20000000000 - ffffe1ffffffffff (=45bits) vmalloc/ioremap space 12 12 ... unused hole ... 13 13 ffffffff80000000 - ffffffff82800000 (=40MB) kernel text mapping, from phys 0 14 14 ... unused hole ... 15 15 ffffffff88000000 - fffffffffff00000 (=1919MB) module mapping space 16 + 17 + The direct mapping covers all memory in the system upto the highest 18 + memory address (this means in some cases it can also include PCI memory 19 + holes) 16 20 17 21 vmalloc space is lazily synchronized into the different PML4 pages of 18 22 the processes using the page fault handler, with init_level4_pgt as

+8 -9

arch/i386/kernel/acpi/boot.c

··· 39 39 40 40 #ifdef CONFIG_X86_64 41 41 42 - static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) 43 - { 44 - } 45 42 extern void __init clustered_apic_check(void); 46 - static inline int ioapic_setup_disabled(void) 47 - { 48 - return 0; 49 - } 50 43 44 + extern int gsi_irq_sharing(int gsi); 51 45 #include <asm/proto.h> 46 + 47 + static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } 48 + 52 49 53 50 #else /* X86 */ 54 51 ··· 53 56 #include <mach_apic.h> 54 57 #include <mach_mpparse.h> 55 58 #endif /* CONFIG_X86_LOCAL_APIC */ 59 + 60 + static inline int gsi_irq_sharing(int gsi) { return gsi; } 56 61 57 62 #endif /* X86 */ 58 63 ··· 458 459 *irq = IO_APIC_VECTOR(gsi); 459 460 else 460 461 #endif 461 - *irq = gsi; 462 + *irq = gsi_irq_sharing(gsi); 462 463 return 0; 463 464 } 464 465 ··· 542 543 * RSDP signature. 543 544 */ 544 545 for (offset = 0; offset < length; offset += 16) { 545 - if (strncmp((char *)(start + offset), "RSD PTR ", sig_len)) 546 + if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) 546 547 continue; 547 548 return (start + offset); 548 549 }

+6 -6

arch/i386/kernel/cpu/amd.c

··· 206 206 display_cacheinfo(c); 207 207 208 208 if (cpuid_eax(0x80000000) >= 0x80000008) { 209 - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 210 - if (c->x86_num_cores & (c->x86_num_cores - 1)) 211 - c->x86_num_cores = 1; 209 + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 210 + if (c->x86_max_cores & (c->x86_max_cores - 1)) 211 + c->x86_max_cores = 1; 212 212 } 213 213 214 214 #ifdef CONFIG_X86_HT ··· 217 217 * distingush the cores. Assumes number of cores is a power 218 218 * of two. 219 219 */ 220 - if (c->x86_num_cores > 1) { 220 + if (c->x86_max_cores > 1) { 221 221 int cpu = smp_processor_id(); 222 222 unsigned bits = 0; 223 - while ((1 << bits) < c->x86_num_cores) 223 + while ((1 << bits) < c->x86_max_cores) 224 224 bits++; 225 225 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); 226 226 phys_proc_id[cpu] >>= bits; 227 227 printk(KERN_INFO "CPU %d(%d) -> Core %d\n", 228 - cpu, c->x86_num_cores, cpu_core_id[cpu]); 228 + cpu, c->x86_max_cores, cpu_core_id[cpu]); 229 229 } 230 230 #endif 231 231 }

+16 -24

arch/i386/kernel/cpu/common.c

··· 231 231 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 232 232 c->x86 = (tfms >> 8) & 15; 233 233 c->x86_model = (tfms >> 4) & 15; 234 - if (c->x86 == 0xf) { 234 + if (c->x86 == 0xf) 235 235 c->x86 += (tfms >> 20) & 0xff; 236 + if (c->x86 >= 0x6) 236 237 c->x86_model += ((tfms >> 16) & 0xF) << 4; 237 - } 238 238 c->x86_mask = tfms & 15; 239 239 if (cap0 & (1<<19)) 240 240 c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; ··· 333 333 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 334 334 c->x86_vendor_id[0] = '\0'; /* Unset */ 335 335 c->x86_model_id[0] = '\0'; /* Unset */ 336 - c->x86_num_cores = 1; 336 + c->x86_max_cores = 1; 337 337 memset(&c->x86_capability, 0, sizeof c->x86_capability); 338 338 339 339 if (!have_cpuid_p()) { ··· 443 443 void __devinit detect_ht(struct cpuinfo_x86 *c) 444 444 { 445 445 u32 eax, ebx, ecx, edx; 446 - int index_msb, tmp; 446 + int index_msb, core_bits; 447 447 int cpu = smp_processor_id(); 448 + 449 + cpuid(1, &eax, &ebx, &ecx, &edx); 450 + 451 + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); 448 452 449 453 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 450 454 return; 451 455 452 - cpuid(1, &eax, &ebx, &ecx, &edx); 453 456 smp_num_siblings = (ebx & 0xff0000) >> 16; 454 457 455 458 if (smp_num_siblings == 1) { 456 459 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 457 460 } else if (smp_num_siblings > 1 ) { 458 - index_msb = 31; 459 461 460 462 if (smp_num_siblings > NR_CPUS) { 461 463 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); 462 464 smp_num_siblings = 1; 463 465 return; 464 466 } 465 - tmp = smp_num_siblings; 466 - while ((tmp & 0x80000000 ) == 0) { 467 - tmp <<=1 ; 468 - index_msb--; 469 - } 470 - if (smp_num_siblings & (smp_num_siblings - 1)) 471 - index_msb++; 467 + 468 + index_msb = get_count_order(smp_num_siblings); 472 469 phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); 473 470 474 471 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 475 472 phys_proc_id[cpu]); 476 473 477 - smp_num_siblings = smp_num_siblings / c->x86_num_cores; 474 + smp_num_siblings = smp_num_siblings / c->x86_max_cores; 478 475 479 - tmp = smp_num_siblings; 480 - index_msb = 31; 481 - while ((tmp & 0x80000000) == 0) { 482 - tmp <<=1 ; 483 - index_msb--; 484 - } 476 + index_msb = get_count_order(smp_num_siblings) ; 485 477 486 - if (smp_num_siblings & (smp_num_siblings - 1)) 487 - index_msb++; 478 + core_bits = get_count_order(c->x86_max_cores); 488 479 489 - cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); 480 + cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & 481 + ((1 << core_bits) - 1); 490 482 491 - if (c->x86_num_cores > 1) 483 + if (c->x86_max_cores > 1) 492 484 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 493 485 cpu_core_id[cpu]); 494 486 }

+1 -1

arch/i386/kernel/cpu/intel.c

··· 158 158 if ( p ) 159 159 strcpy(c->x86_model_id, p); 160 160 161 - c->x86_num_cores = num_cpu_cores(c); 161 + c->x86_max_cores = num_cpu_cores(c); 162 162 163 163 detect_ht(c); 164 164

+32 -14

arch/i386/kernel/cpu/intel_cacheinfo.c

··· 293 293 #ifdef CONFIG_SMP 294 294 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 295 295 { 296 - struct _cpuid4_info *this_leaf; 296 + struct _cpuid4_info *this_leaf, *sibling_leaf; 297 297 unsigned long num_threads_sharing; 298 - #ifdef CONFIG_X86_HT 299 - struct cpuinfo_x86 *c = cpu_data + cpu; 300 - #endif 298 + int index_msb, i; 299 + struct cpuinfo_x86 *c = cpu_data; 301 300 302 301 this_leaf = CPUID4_INFO_IDX(cpu, index); 303 302 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 304 303 305 304 if (num_threads_sharing == 1) 306 305 cpu_set(cpu, this_leaf->shared_cpu_map); 307 - #ifdef CONFIG_X86_HT 308 - else if (num_threads_sharing == smp_num_siblings) 309 - this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; 310 - else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) 311 - this_leaf->shared_cpu_map = cpu_core_map[cpu]; 312 - else 313 - printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " 314 - "any known set of CPUs\n"); 315 - #endif 306 + else { 307 + index_msb = get_count_order(num_threads_sharing); 308 + 309 + for_each_online_cpu(i) { 310 + if (c[i].apicid >> index_msb == 311 + c[cpu].apicid >> index_msb) { 312 + cpu_set(i, this_leaf->shared_cpu_map); 313 + if (i != cpu && cpuid4_info[i]) { 314 + sibling_leaf = CPUID4_INFO_IDX(i, index); 315 + cpu_set(cpu, sibling_leaf->shared_cpu_map); 316 + } 317 + } 318 + } 319 + } 320 + } 321 + static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index) 322 + { 323 + struct _cpuid4_info *this_leaf, *sibling_leaf; 324 + int sibling; 325 + 326 + this_leaf = CPUID4_INFO_IDX(cpu, index); 327 + for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { 328 + sibling_leaf = CPUID4_INFO_IDX(sibling, index); 329 + cpu_clear(cpu, sibling_leaf->shared_cpu_map); 330 + } 316 331 } 317 332 #else 318 333 static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} 334 + static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} 319 335 #endif 320 336 321 337 static void free_cache_attributes(unsigned int cpu) ··· 590 574 unsigned int cpu = sys_dev->id; 591 575 unsigned long i; 592 576 593 - for (i = 0; i < num_cache_leaves; i++) 577 + for (i = 0; i < num_cache_leaves; i++) { 578 + cache_remove_shared_cpu_map(cpu, i); 594 579 kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 580 + } 595 581 kobject_unregister(cache_kobject[cpu]); 596 582 cpuid4_cache_sysfs_exit(cpu); 597 583 return;

+8

arch/i386/kernel/cpu/mtrr/main.c

··· 626 626 if (cpuid_eax(0x80000000) >= 0x80000008) { 627 627 u32 phys_addr; 628 628 phys_addr = cpuid_eax(0x80000008) & 0xff; 629 + /* CPUID workaround for Intel 0F33/0F34 CPU */ 630 + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 631 + boot_cpu_data.x86 == 0xF && 632 + boot_cpu_data.x86_model == 0x3 && 633 + (boot_cpu_data.x86_mask == 0x3 || 634 + boot_cpu_data.x86_mask == 0x4)) 635 + phys_addr = 36; 636 + 629 637 size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); 630 638 size_and_mask = ~size_or_mask & 0xfff00000; 631 639 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&

+3 -4

arch/i386/kernel/cpu/proc.c

··· 94 94 if (c->x86_cache_size >= 0) 95 95 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); 96 96 #ifdef CONFIG_X86_HT 97 - if (c->x86_num_cores * smp_num_siblings > 1) { 97 + if (c->x86_max_cores * smp_num_siblings > 1) { 98 98 seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); 99 - seq_printf(m, "siblings\t: %d\n", 100 - c->x86_num_cores * smp_num_siblings); 99 + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); 101 100 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); 102 - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); 101 + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); 103 102 } 104 103 #endif 105 104

+53 -20

arch/i386/kernel/smpboot.c

··· 72 72 /* Core ID of each logical CPU */ 73 73 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; 74 74 75 + /* representing HT siblings of each logical CPU */ 75 76 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 76 77 EXPORT_SYMBOL(cpu_sibling_map); 77 78 79 + /* representing HT and core siblings of each logical CPU */ 78 80 cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 79 81 EXPORT_SYMBOL(cpu_core_map); 80 82 ··· 444 442 445 443 static int cpucount; 446 444 445 + /* representing cpus for which sibling maps can be computed */ 446 + static cpumask_t cpu_sibling_setup_map; 447 + 447 448 static inline void 448 449 set_cpu_sibling_map(int cpu) 449 450 { 450 451 int i; 452 + struct cpuinfo_x86 *c = cpu_data; 453 + 454 + cpu_set(cpu, cpu_sibling_setup_map); 451 455 452 456 if (smp_num_siblings > 1) { 453 - for (i = 0; i < NR_CPUS; i++) { 454 - if (!cpu_isset(i, cpu_callout_map)) 455 - continue; 456 - if (cpu_core_id[cpu] == cpu_core_id[i]) { 457 + for_each_cpu_mask(i, cpu_sibling_setup_map) { 458 + if (phys_proc_id[cpu] == phys_proc_id[i] && 459 + cpu_core_id[cpu] == cpu_core_id[i]) { 457 460 cpu_set(i, cpu_sibling_map[cpu]); 458 461 cpu_set(cpu, cpu_sibling_map[i]); 462 + cpu_set(i, cpu_core_map[cpu]); 463 + cpu_set(cpu, cpu_core_map[i]); 459 464 } 460 465 } 461 466 } else { 462 467 cpu_set(cpu, cpu_sibling_map[cpu]); 463 468 } 464 469 465 - if (current_cpu_data.x86_num_cores > 1) { 466 - for (i = 0; i < NR_CPUS; i++) { 467 - if (!cpu_isset(i, cpu_callout_map)) 468 - continue; 469 - if (phys_proc_id[cpu] == phys_proc_id[i]) { 470 - cpu_set(i, cpu_core_map[cpu]); 471 - cpu_set(cpu, cpu_core_map[i]); 472 - } 473 - } 474 - } else { 470 + if (current_cpu_data.x86_max_cores == 1) { 475 471 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 472 + c[cpu].booted_cores = 1; 473 + return; 474 + } 475 + 476 + for_each_cpu_mask(i, cpu_sibling_setup_map) { 477 + if (phys_proc_id[cpu] == phys_proc_id[i]) { 478 + cpu_set(i, cpu_core_map[cpu]); 479 + cpu_set(cpu, cpu_core_map[i]); 480 + /* 481 + * Does this new cpu bringup a new core? 482 + */ 483 + if (cpus_weight(cpu_sibling_map[cpu]) == 1) { 484 + /* 485 + * for each core in package, increment 486 + * the booted_cores for this new cpu 487 + */ 488 + if (first_cpu(cpu_sibling_map[i]) == i) 489 + c[cpu].booted_cores++; 490 + /* 491 + * increment the core count for all 492 + * the other cpus in this package 493 + */ 494 + if (i != cpu) 495 + c[i].booted_cores++; 496 + } else if (i != cpu && !c[cpu].booted_cores) 497 + c[cpu].booted_cores = c[i].booted_cores; 498 + } 476 499 } 477 500 } 478 501 ··· 1122 1095 1123 1096 current_thread_info()->cpu = 0; 1124 1097 smp_tune_scheduling(); 1125 - cpus_clear(cpu_sibling_map[0]); 1126 - cpu_set(0, cpu_sibling_map[0]); 1127 1098 1128 - cpus_clear(cpu_core_map[0]); 1129 - cpu_set(0, cpu_core_map[0]); 1099 + set_cpu_sibling_map(0); 1130 1100 1131 1101 /* 1132 1102 * If we couldn't find an SMP configuration at boot time, ··· 1302 1278 remove_siblinginfo(int cpu) 1303 1279 { 1304 1280 int sibling; 1281 + struct cpuinfo_x86 *c = cpu_data; 1305 1282 1283 + for_each_cpu_mask(sibling, cpu_core_map[cpu]) { 1284 + cpu_clear(cpu, cpu_core_map[sibling]); 1285 + /* 1286 + * last thread sibling in this cpu core going down 1287 + */ 1288 + if (cpus_weight(cpu_sibling_map[cpu]) == 1) 1289 + c[sibling].booted_cores--; 1290 + } 1291 + 1306 1292 for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) 1307 1293 cpu_clear(cpu, cpu_sibling_map[sibling]); 1308 - for_each_cpu_mask(sibling, cpu_core_map[cpu]) 1309 - cpu_clear(cpu, cpu_core_map[sibling]); 1310 1294 cpus_clear(cpu_sibling_map[cpu]); 1311 1295 cpus_clear(cpu_core_map[cpu]); 1312 1296 phys_proc_id[cpu] = BAD_APICID; 1313 1297 cpu_core_id[cpu] = BAD_APICID; 1298 + cpu_clear(cpu, cpu_sibling_setup_map); 1314 1299 } 1315 1300 1316 1301 int __cpu_disable(void)

+2 -2

arch/i386/kernel/srat.c

··· 137 137 "enabled and removable" : "enabled" ) ); 138 138 } 139 139 140 - #if MAX_NR_ZONES != 3 141 - #error "MAX_NR_ZONES != 3, chunk_to_zone requires review" 140 + #if MAX_NR_ZONES != 4 141 + #error "MAX_NR_ZONES != 4, chunk_to_zone requires review" 142 142 #endif 143 143 /* Take a chunk of pages from page frame cstart to cend and count the number 144 144 * of pages in each zone, returned via zones[].

+4

arch/ia64/Kconfig

··· 58 58 bool 59 59 select GENERIC_ALLOCATOR 60 60 61 + config ZONE_DMA_IS_DMA32 62 + bool 63 + default y 64 + 61 65 choice 62 66 prompt "System type" 63 67 default IA64_GENERIC

+41 -16

arch/x86_64/Kconfig

··· 226 226 227 227 source "kernel/Kconfig.preempt" 228 228 229 - config K8_NUMA 230 - bool "K8 NUMA support" 231 - select NUMA 229 + config NUMA 230 + bool "Non Uniform Memory Access (NUMA) Support" 232 231 depends on SMP 233 232 help 234 - Enable NUMA (Non Unified Memory Architecture) support for 235 - AMD Opteron Multiprocessor systems. The kernel will try to allocate 236 - memory used by a CPU on the local memory controller of the CPU 237 - and add some more NUMA awareness to the kernel. 238 - This code is recommended on all multiprocessor Opteron systems 239 - and normally doesn't hurt on others. 233 + Enable NUMA (Non Uniform Memory Access) support. The kernel 234 + will try to allocate memory used by a CPU on the local memory 235 + controller of the CPU and add some more NUMA awareness to the kernel. 236 + This code is recommended on all multiprocessor Opteron systems. 237 + If the system is EM64T, you should say N unless your system is EM64T 238 + NUMA. 239 + 240 + config K8_NUMA 241 + bool "Old style AMD Opteron NUMA detection" 242 + depends on NUMA 243 + default y 244 + help 245 + Enable K8 NUMA node topology detection. You should say Y here if 246 + you have a multi processor AMD K8 system. This uses an old 247 + method to read the NUMA configurtion directly from the builtin 248 + Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA 249 + instead, which also takes priority if both are compiled in. 250 + 251 + # Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig. 252 + 253 + config X86_64_ACPI_NUMA 254 + bool "ACPI NUMA detection" 255 + depends on NUMA 256 + select ACPI 257 + select ACPI_NUMA 258 + default y 259 + help 260 + Enable ACPI SRAT based node topology detection. 240 261 241 262 config NUMA_EMU 242 - bool "NUMA emulation support" 243 - select NUMA 244 - depends on SMP 263 + bool "NUMA emulation" 264 + depends on NUMA 245 265 help 246 266 Enable NUMA emulation. A flat machine will be split 247 267 into virtual nodes when booted with "numa=fake=N", where N is the ··· 272 252 depends on NUMA 273 253 default y 274 254 275 - config NUMA 276 - bool 277 - default n 278 255 279 256 config ARCH_DISCONTIGMEM_ENABLE 280 257 def_bool y ··· 390 373 help 391 374 Additional support for intel specific MCE features such as 392 375 the thermal monitor. 376 + 377 + config X86_MCE_AMD 378 + bool "AMD MCE features" 379 + depends on X86_MCE && X86_LOCAL_APIC 380 + default y 381 + help 382 + Additional support for AMD specific MCE features such as 383 + the DRAM Error Threshold. 393 384 394 385 config PHYSICAL_START 395 386 hex "Physical address where the kernel is loaded" if EMBEDDED ··· 527 502 left. 528 503 529 504 config IA32_AOUT 530 - bool "IA32 a.out support" 505 + tristate "IA32 a.out support" 531 506 depends on IA32_EMULATION 532 507 help 533 508 Support old a.out binaries in the 32bit emulation.

-9

arch/x86_64/Kconfig.debug

··· 2 2 3 3 source "lib/Kconfig.debug" 4 4 5 - # !SMP for now because the context switch early causes GPF in segment reloading 6 - # and the GS base checking does the wrong thing then, causing a hang. 7 - config CHECKING 8 - bool "Additional run-time checks" 9 - depends on DEBUG_KERNEL && !SMP 10 - help 11 - Enables some internal consistency checks for kernel debugging. 12 - You should normally say N. 13 - 14 5 config INIT_DEBUG 15 6 bool "Debug __init statements" 16 7 depends on DEBUG_KERNEL

+83 -15

arch/x86_64/defconfig

··· 1 1 # 2 2 # Automatically generated make config: don't edit 3 - # Linux kernel version: 2.6.13-git11 4 - # Mon Sep 12 16:16:16 2005 3 + # Linux kernel version: 2.6.14-git7 4 + # Sat Nov 5 15:55:50 2005 5 5 # 6 6 CONFIG_X86_64=y 7 7 CONFIG_64BIT=y ··· 35 35 # CONFIG_BSD_PROCESS_ACCT is not set 36 36 CONFIG_SYSCTL=y 37 37 # CONFIG_AUDIT is not set 38 - # CONFIG_HOTPLUG is not set 38 + CONFIG_HOTPLUG=y 39 39 CONFIG_KOBJECT_UEVENT=y 40 40 CONFIG_IKCONFIG=y 41 41 CONFIG_IKCONFIG_PROC=y ··· 93 93 # CONFIG_PREEMPT_VOLUNTARY is not set 94 94 # CONFIG_PREEMPT is not set 95 95 CONFIG_PREEMPT_BKL=y 96 + CONFIG_NUMA=y 96 97 CONFIG_K8_NUMA=y 98 + CONFIG_X86_64_ACPI_NUMA=y 97 99 # CONFIG_NUMA_EMU is not set 98 100 CONFIG_ARCH_DISCONTIGMEM_ENABLE=y 99 - CONFIG_NUMA=y 100 101 CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y 101 102 CONFIG_ARCH_SPARSEMEM_ENABLE=y 102 103 CONFIG_SELECT_MEMORY_MODEL=y ··· 108 107 CONFIG_FLAT_NODE_MEM_MAP=y 109 108 CONFIG_NEED_MULTIPLE_NODES=y 110 109 # CONFIG_SPARSEMEM_STATIC is not set 110 + CONFIG_SPLIT_PTLOCK_CPUS=4 111 111 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y 112 - CONFIG_HAVE_DEC_LOCK=y 113 112 CONFIG_NR_CPUS=32 113 + CONFIG_HOTPLUG_CPU=y 114 114 CONFIG_HPET_TIMER=y 115 115 CONFIG_X86_PM_TIMER=y 116 116 CONFIG_HPET_EMULATE_RTC=y ··· 119 117 CONFIG_SWIOTLB=y 120 118 CONFIG_X86_MCE=y 121 119 CONFIG_X86_MCE_INTEL=y 120 + CONFIG_X86_MCE_AMD=y 122 121 CONFIG_PHYSICAL_START=0x100000 123 122 # CONFIG_KEXEC is not set 124 123 CONFIG_SECCOMP=y ··· 139 136 # CONFIG_PM_DEBUG is not set 140 137 CONFIG_SOFTWARE_SUSPEND=y 141 138 CONFIG_PM_STD_PARTITION="" 139 + CONFIG_SUSPEND_SMP=y 142 140 143 141 # 144 142 # ACPI (Advanced Configuration and Power Interface) Support 145 143 # 146 144 CONFIG_ACPI=y 145 + CONFIG_ACPI_SLEEP=y 146 + CONFIG_ACPI_SLEEP_PROC_FS=y 147 + CONFIG_ACPI_SLEEP_PROC_SLEEP=y 147 148 CONFIG_ACPI_AC=y 148 149 CONFIG_ACPI_BATTERY=y 149 150 CONFIG_ACPI_BUTTON=y ··· 155 148 CONFIG_ACPI_HOTKEY=m 156 149 CONFIG_ACPI_FAN=y 157 150 CONFIG_ACPI_PROCESSOR=y 151 + CONFIG_ACPI_HOTPLUG_CPU=y 158 152 CONFIG_ACPI_THERMAL=y 159 153 CONFIG_ACPI_NUMA=y 160 154 # CONFIG_ACPI_ASUS is not set ··· 166 158 CONFIG_ACPI_EC=y 167 159 CONFIG_ACPI_POWER=y 168 160 CONFIG_ACPI_SYSTEM=y 169 - # CONFIG_ACPI_CONTAINER is not set 161 + CONFIG_ACPI_CONTAINER=y 170 162 171 163 # 172 164 # CPU Frequency scaling ··· 301 293 # Network testing 302 294 # 303 295 # CONFIG_NET_PKTGEN is not set 304 - # CONFIG_NETFILTER_NETLINK is not set 305 296 # CONFIG_HAMRADIO is not set 306 297 # CONFIG_IRDA is not set 307 298 # CONFIG_BT is not set ··· 317 310 CONFIG_PREVENT_FIRMWARE_BUILD=y 318 311 # CONFIG_FW_LOADER is not set 319 312 # CONFIG_DEBUG_DRIVER is not set 313 + 314 + # 315 + # Connector - unified userspace <-> kernelspace linker 316 + # 317 + # CONFIG_CONNECTOR is not set 320 318 321 319 # 322 320 # Memory Technology Devices (MTD) ··· 366 354 # CONFIG_IOSCHED_AS is not set 367 355 CONFIG_IOSCHED_DEADLINE=y 368 356 CONFIG_IOSCHED_CFQ=y 357 + # CONFIG_DEFAULT_AS is not set 358 + CONFIG_DEFAULT_DEADLINE=y 359 + # CONFIG_DEFAULT_CFQ is not set 360 + # CONFIG_DEFAULT_NOOP is not set 361 + CONFIG_DEFAULT_IOSCHED="cfq" 369 362 # CONFIG_ATA_OVER_ETH is not set 370 363 371 364 # ··· 467 450 CONFIG_SCSI_SPI_ATTRS=y 468 451 # CONFIG_SCSI_FC_ATTRS is not set 469 452 # CONFIG_SCSI_ISCSI_ATTRS is not set 453 + # CONFIG_SCSI_SAS_ATTRS is not set 470 454 471 455 # 472 456 # SCSI low-level drivers ··· 487 469 # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set 488 470 # CONFIG_MEGARAID_NEWGEN is not set 489 471 # CONFIG_MEGARAID_LEGACY is not set 472 + # CONFIG_MEGARAID_SAS is not set 490 473 CONFIG_SCSI_SATA=y 491 474 # CONFIG_SCSI_SATA_AHCI is not set 492 475 # CONFIG_SCSI_SATA_SVW is not set 493 476 CONFIG_SCSI_ATA_PIIX=y 494 477 # CONFIG_SCSI_SATA_MV is not set 495 - # CONFIG_SCSI_SATA_NV is not set 496 - # CONFIG_SCSI_SATA_PROMISE is not set 478 + CONFIG_SCSI_SATA_NV=y 479 + # CONFIG_SCSI_PDC_ADMA is not set 497 480 # CONFIG_SCSI_SATA_QSTOR is not set 481 + # CONFIG_SCSI_SATA_PROMISE is not set 498 482 # CONFIG_SCSI_SATA_SX4 is not set 499 483 # CONFIG_SCSI_SATA_SIL is not set 484 + # CONFIG_SCSI_SATA_SIL24 is not set 500 485 # CONFIG_SCSI_SATA_SIS is not set 501 486 # CONFIG_SCSI_SATA_ULI is not set 502 487 CONFIG_SCSI_SATA_VIA=y 503 488 # CONFIG_SCSI_SATA_VITESSE is not set 489 + CONFIG_SCSI_SATA_INTEL_COMBINED=y 504 490 # CONFIG_SCSI_BUSLOGIC is not set 505 491 # CONFIG_SCSI_DMX3191D is not set 506 492 # CONFIG_SCSI_EATA is not set ··· 547 525 CONFIG_FUSION=y 548 526 CONFIG_FUSION_SPI=y 549 527 # CONFIG_FUSION_FC is not set 528 + # CONFIG_FUSION_SAS is not set 550 529 CONFIG_FUSION_MAX_SGE=128 551 530 # CONFIG_FUSION_CTL is not set 552 531 ··· 587 564 CONFIG_MII=y 588 565 # CONFIG_HAPPYMEAL is not set 589 566 # CONFIG_SUNGEM is not set 567 + # CONFIG_CASSINI is not set 590 568 CONFIG_NET_VENDOR_3COM=y 591 569 CONFIG_VORTEX=y 592 570 # CONFIG_TYPHOON is not set ··· 764 740 # 765 741 # Watchdog Cards 766 742 # 767 - # CONFIG_WATCHDOG is not set 743 + CONFIG_WATCHDOG=y 744 + # CONFIG_WATCHDOG_NOWAYOUT is not set 745 + 746 + # 747 + # Watchdog Device Drivers 748 + # 749 + CONFIG_SOFT_WATCHDOG=y 750 + # CONFIG_ACQUIRE_WDT is not set 751 + # CONFIG_ADVANTECH_WDT is not set 752 + # CONFIG_ALIM1535_WDT is not set 753 + # CONFIG_ALIM7101_WDT is not set 754 + # CONFIG_SC520_WDT is not set 755 + # CONFIG_EUROTECH_WDT is not set 756 + # CONFIG_IB700_WDT is not set 757 + # CONFIG_IBMASR is not set 758 + # CONFIG_WAFER_WDT is not set 759 + # CONFIG_I6300ESB_WDT is not set 760 + # CONFIG_I8XX_TCO is not set 761 + # CONFIG_SC1200_WDT is not set 762 + # CONFIG_60XX_WDT is not set 763 + # CONFIG_SBC8360_WDT is not set 764 + # CONFIG_CPU5_WDT is not set 765 + # CONFIG_W83627HF_WDT is not set 766 + # CONFIG_W83877F_WDT is not set 767 + # CONFIG_W83977F_WDT is not set 768 + # CONFIG_MACHZ_WDT is not set 769 + 770 + # 771 + # PCI-based Watchdog Cards 772 + # 773 + # CONFIG_PCIPCWATCHDOG is not set 774 + # CONFIG_WDTPCI is not set 775 + 776 + # 777 + # USB-based Watchdog Cards 778 + # 779 + # CONFIG_USBPCWATCHDOG is not set 768 780 CONFIG_HW_RANDOM=y 769 781 # CONFIG_NVRAM is not set 770 782 CONFIG_RTC=y ··· 827 767 # TPM devices 828 768 # 829 769 # CONFIG_TCG_TPM is not set 770 + # CONFIG_TELCLOCK is not set 830 771 831 772 # 832 773 # I2C support ··· 844 783 # 845 784 CONFIG_HWMON=y 846 785 # CONFIG_HWMON_VID is not set 786 + # CONFIG_SENSORS_HDAPS is not set 847 787 # CONFIG_HWMON_DEBUG_CHIP is not set 848 788 849 789 # ··· 948 886 # USB Device Class drivers 949 887 # 950 888 # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set 951 - # CONFIG_USB_BLUETOOTH_TTY is not set 952 889 # CONFIG_USB_ACM is not set 953 890 CONFIG_USB_PRINTER=y 954 891 955 892 # 956 - # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information 893 + # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' 894 + # 895 + 896 + # 897 + # may also be needed; see USB_STORAGE Help for more information 957 898 # 958 899 CONFIG_USB_STORAGE=y 959 900 # CONFIG_USB_STORAGE_DEBUG is not set ··· 989 924 # CONFIG_USB_XPAD is not set 990 925 # CONFIG_USB_ATI_REMOTE is not set 991 926 # CONFIG_USB_KEYSPAN_REMOTE is not set 927 + # CONFIG_USB_APPLETOUCH is not set 992 928 993 929 # 994 930 # USB Imaging devices ··· 1071 1005 # 1072 1006 # CONFIG_EDD is not set 1073 1007 # CONFIG_DELL_RBU is not set 1074 - CONFIG_DCDBAS=m 1008 + # CONFIG_DCDBAS is not set 1075 1009 1076 1010 # 1077 1011 # File systems ··· 1103 1037 # CONFIG_QUOTA is not set 1104 1038 CONFIG_DNOTIFY=y 1105 1039 CONFIG_AUTOFS_FS=y 1106 - # CONFIG_AUTOFS4_FS is not set 1040 + CONFIG_AUTOFS4_FS=y 1107 1041 # CONFIG_FUSE_FS is not set 1108 1042 1109 1043 # ··· 1134 1068 CONFIG_HUGETLBFS=y 1135 1069 CONFIG_HUGETLB_PAGE=y 1136 1070 CONFIG_RAMFS=y 1137 - # CONFIG_RELAYFS_FS is not set 1071 + CONFIG_RELAYFS_FS=y 1138 1072 1139 1073 # 1140 1074 # Miscellaneous filesystems ··· 1252 1186 # CONFIG_DEBUG_KOBJECT is not set 1253 1187 # CONFIG_DEBUG_INFO is not set 1254 1188 CONFIG_DEBUG_FS=y 1189 + # CONFIG_DEBUG_VM is not set 1255 1190 # CONFIG_FRAME_POINTER is not set 1191 + # CONFIG_RCU_TORTURE_TEST is not set 1256 1192 CONFIG_INIT_DEBUG=y 1257 1193 # CONFIG_IOMMU_DEBUG is not set 1258 1194 CONFIG_KPROBES=y

-3

arch/x86_64/ia32/ia32_aout.c

··· 36 36 #undef WARN_OLD 37 37 #undef CORE_DUMP /* probably broken */ 38 38 39 - extern int ia32_setup_arg_pages(struct linux_binprm *bprm, 40 - unsigned long stack_top, int exec_stack); 41 - 42 39 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); 43 40 static int load_aout_library(struct file*); 44 41

+3 -1

arch/x86_64/ia32/ia32_binfmt.c

··· 335 335 me->thread.es = __USER_DS; 336 336 } 337 337 338 - int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) 338 + int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, 339 + int executable_stack) 339 340 { 340 341 unsigned long stack_base; 341 342 struct vm_area_struct *mpnt; ··· 390 389 391 390 return 0; 392 391 } 392 + EXPORT_SYMBOL(ia32_setup_arg_pages); 393 393 394 394 static unsigned long 395 395 elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)

+1

arch/x86_64/kernel/Makefile

··· 11 11 12 12 obj-$(CONFIG_X86_MCE) += mce.o 13 13 obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 14 + obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o 14 15 obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ 15 16 obj-$(CONFIG_ACPI) += acpi/ 16 17 obj-$(CONFIG_X86_MSR) += msr.o

+1 -1

arch/x86_64/kernel/aperture.c

··· 196 196 void __init iommu_hole_init(void) 197 197 { 198 198 int fix, num; 199 - u32 aper_size, aper_alloc = 0, aper_order, last_aper_order = 0; 199 + u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; 200 200 u64 aper_base, last_aper_base = 0; 201 201 int valid_agp = 0; 202 202

+10

arch/x86_64/kernel/apic.c

··· 833 833 return 0; 834 834 } 835 835 836 + #ifdef CONFIG_X86_MCE_AMD 837 + void setup_threshold_lvt(unsigned long lvt_off) 838 + { 839 + unsigned int v = 0; 840 + unsigned long reg = (lvt_off << 4) + 0x500; 841 + v |= THRESHOLD_APIC_VECTOR; 842 + apic_write(reg, v); 843 + } 844 + #endif /* CONFIG_X86_MCE_AMD */ 845 + 836 846 #undef APIC_DIVISOR 837 847 838 848 /*

+1 -2

arch/x86_64/kernel/e820.c

··· 23 23 #include <asm/e820.h> 24 24 #include <asm/proto.h> 25 25 #include <asm/bootsetup.h> 26 - 27 - extern char _end[]; 26 + #include <asm/sections.h> 28 27 29 28 /* 30 29 * PFN of last memory page.

+3

arch/x86_64/kernel/entry.S

··· 612 612 ENTRY(thermal_interrupt) 613 613 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 614 614 615 + ENTRY(threshold_interrupt) 616 + apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt 617 + 615 618 #ifdef CONFIG_SMP 616 619 ENTRY(reschedule_interrupt) 617 620 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt

+23 -14

arch/x86_64/kernel/head.S

··· 12 12 13 13 #include <linux/linkage.h> 14 14 #include <linux/threads.h> 15 + #include <linux/init.h> 15 16 #include <asm/desc.h> 16 17 #include <asm/segment.h> 17 18 #include <asm/page.h> ··· 71 70 movl %eax, %cr4 72 71 73 72 /* Setup early boot stage 4 level pagetables */ 74 - movl $(init_level4_pgt - __START_KERNEL_map), %eax 73 + movl $(boot_level4_pgt - __START_KERNEL_map), %eax 75 74 movl %eax, %cr3 76 75 77 76 /* Setup EFER (Extended Feature Enable Register) */ ··· 114 113 movq %rax, %cr4 115 114 116 115 /* Setup early boot stage 4 level pagetables. */ 117 - movq $(init_level4_pgt - __START_KERNEL_map), %rax 116 + movq $(boot_level4_pgt - __START_KERNEL_map), %rax 118 117 movq %rax, %cr3 119 118 120 119 /* Check if nx is implemented */ ··· 241 240 ENTRY(stext) 242 241 ENTRY(_stext) 243 242 244 - /* 245 - * This default setting generates an ident mapping at address 0x100000 246 - * and a mapping for the kernel that precisely maps virtual address 247 - * 0xffffffff80000000 to physical address 0x000000. (always using 248 - * 2Mbyte large pages provided by PAE mode) 249 - */ 250 243 .org 0x1000 251 244 ENTRY(init_level4_pgt) 252 - .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ 253 - .fill 255,8,0 254 - .quad 0x000000000000a007 + __PHYSICAL_START 255 - .fill 254,8,0 256 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 257 - .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 245 + /* This gets initialized in x86_64_start_kernel */ 246 + .fill 512,8,0 258 247 259 248 .org 0x2000 260 249 ENTRY(level3_ident_pgt) ··· 340 349 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 341 350 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 342 351 #endif 352 + 353 + #ifndef CONFIG_HOTPLUG_CPU 354 + __INITDATA 355 + #endif 356 + /* 357 + * This default setting generates an ident mapping at address 0x100000 358 + * and a mapping for the kernel that precisely maps virtual address 359 + * 0xffffffff80000000 to physical address 0x000000. (always using 360 + * 2Mbyte large pages provided by PAE mode) 361 + */ 362 + .align PAGE_SIZE 363 + ENTRY(boot_level4_pgt) 364 + .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ 365 + .fill 255,8,0 366 + .quad 0x000000000000a007 + __PHYSICAL_START 367 + .fill 254,8,0 368 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 369 + .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 343 370 344 371 .data 345 372

+10 -4

arch/x86_64/kernel/head64.c

··· 19 19 #include <asm/bootsetup.h> 20 20 #include <asm/setup.h> 21 21 #include <asm/desc.h> 22 + #include <asm/pgtable.h> 23 + #include <asm/sections.h> 22 24 23 25 /* Don't add a printk in there. printk relies on the PDA which is not initialized 24 26 yet. */ 25 27 static void __init clear_bss(void) 26 28 { 27 - extern char __bss_start[], __bss_end[]; 28 29 memset(__bss_start, 0, 29 - (unsigned long) __bss_end - (unsigned long) __bss_start); 30 + (unsigned long) __bss_stop - (unsigned long) __bss_start); 30 31 } 31 32 32 33 #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ ··· 76 75 boot_cpu_data.x86_mask = eax & 0xf; 77 76 } 78 77 79 - extern char _end[]; 80 - 81 78 void __init x86_64_start_kernel(char * real_mode_data) 82 79 { 83 80 char *s; ··· 85 86 set_intr_gate(i, early_idt_handler); 86 87 asm volatile("lidt %0" :: "m" (idt_descr)); 87 88 clear_bss(); 89 + 90 + /* 91 + * switch to init_level4_pgt from boot_level4_pgt 92 + */ 93 + memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); 94 + asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); 95 + 88 96 pda_init(0); 89 97 copy_bootdata(real_mode_data); 90 98 #ifdef CONFIG_SMP

+2

arch/x86_64/kernel/i8259.c

··· 492 492 void invalidate_interrupt6(void); 493 493 void invalidate_interrupt7(void); 494 494 void thermal_interrupt(void); 495 + void threshold_interrupt(void); 495 496 void i8254_timer_resume(void); 496 497 497 498 static void setup_timer_hardware(void) ··· 581 580 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 582 581 #endif 583 582 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 583 + set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 584 584 585 585 #ifdef CONFIG_X86_LOCAL_APIC 586 586 /* self generated IPI for local APIC timer */

+73 -7

arch/x86_64/kernel/io_apic.c

··· 57 57 * Rough estimation of how many shared IRQs there are, can 58 58 * be changed anytime. 59 59 */ 60 - #define MAX_PLUS_SHARED_IRQS NR_IRQS 60 + #define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS 61 61 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) 62 62 63 63 /* ··· 85 85 int pin; \ 86 86 struct irq_pin_list *entry = irq_2_pin + irq; \ 87 87 \ 88 + BUG_ON(irq >= NR_IRQS); \ 88 89 for (;;) { \ 89 90 unsigned int reg; \ 90 91 pin = entry->pin; \ ··· 128 127 } 129 128 #endif 130 129 130 + static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF }; 131 + 131 132 /* 132 133 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 133 134 * shared ISA-space IRQs, so we have to support them. We are super ··· 140 137 static int first_free_entry = NR_IRQS; 141 138 struct irq_pin_list *entry = irq_2_pin + irq; 142 139 140 + BUG_ON(irq >= NR_IRQS); 143 141 while (entry->next) 144 142 entry = irq_2_pin + entry->next; 145 143 ··· 148 144 entry->next = first_free_entry; 149 145 entry = irq_2_pin + entry->next; 150 146 if (++first_free_entry >= PIN_MAP_SIZE) 151 - panic("io_apic.c: whoops"); 147 + panic("io_apic.c: ran out of irq_2_pin entries!"); 152 148 } 153 149 entry->apic = apic; 154 150 entry->pin = pin; ··· 424 420 best_guess = irq; 425 421 } 426 422 } 423 + BUG_ON(best_guess >= NR_IRQS); 427 424 return best_guess; 428 425 } 429 426 ··· 615 610 return MPBIOS_trigger(idx); 616 611 } 617 612 613 + static int next_irq = 16; 614 + 615 + /* 616 + * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ 617 + * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number 618 + * from ACPI, which can reach 800 in large boxen. 619 + * 620 + * Compact the sparse GSI space into a sequential IRQ series and reuse 621 + * vectors if possible. 622 + */ 623 + int gsi_irq_sharing(int gsi) 624 + { 625 + int i, tries, vector; 626 + 627 + BUG_ON(gsi >= NR_IRQ_VECTORS); 628 + 629 + if (platform_legacy_irq(gsi)) 630 + return gsi; 631 + 632 + if (gsi_2_irq[gsi] != 0xFF) 633 + return (int)gsi_2_irq[gsi]; 634 + 635 + tries = NR_IRQS; 636 + try_again: 637 + vector = assign_irq_vector(gsi); 638 + 639 + /* 640 + * Sharing vectors means sharing IRQs, so scan irq_vectors for previous 641 + * use of vector and if found, return that IRQ. However, we never want 642 + * to share legacy IRQs, which usually have a different trigger mode 643 + * than PCI. 644 + */ 645 + for (i = 0; i < NR_IRQS; i++) 646 + if (IO_APIC_VECTOR(i) == vector) 647 + break; 648 + if (platform_legacy_irq(i)) { 649 + if (--tries >= 0) { 650 + IO_APIC_VECTOR(i) = 0; 651 + goto try_again; 652 + } 653 + panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi); 654 + } 655 + if (i < NR_IRQS) { 656 + gsi_2_irq[gsi] = i; 657 + printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n", 658 + gsi, vector, i); 659 + return i; 660 + } 661 + 662 + i = next_irq++; 663 + BUG_ON(i >= NR_IRQS); 664 + gsi_2_irq[gsi] = i; 665 + IO_APIC_VECTOR(i) = vector; 666 + printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n", 667 + gsi, vector, i); 668 + return i; 669 + } 670 + 618 671 static int pin_2_irq(int idx, int apic, int pin) 619 672 { 620 673 int irq, i; ··· 702 639 while (i < apic) 703 640 irq += nr_ioapic_registers[i++]; 704 641 irq += pin; 642 + irq = gsi_irq_sharing(irq); 705 643 break; 706 644 } 707 645 default: ··· 712 648 break; 713 649 } 714 650 } 651 + BUG_ON(irq >= NR_IRQS); 715 652 716 653 /* 717 654 * PCI IRQ command line redirection. Yes, limits are hardcoded. ··· 728 663 } 729 664 } 730 665 } 666 + BUG_ON(irq >= NR_IRQS); 731 667 return irq; 732 668 } 733 669 ··· 756 690 { 757 691 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 758 692 759 - BUG_ON(irq >= NR_IRQ_VECTORS); 760 - if (IO_APIC_VECTOR(irq) > 0) 693 + BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); 694 + if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) 761 695 return IO_APIC_VECTOR(irq); 762 696 next: 763 697 current_vector += 8; ··· 765 699 goto next; 766 700 767 701 if (current_vector >= FIRST_SYSTEM_VECTOR) { 768 - offset++; 769 - if (!(offset%8)) 770 - return -ENOSPC; 702 + /* If we run out of vectors on large boxen, must share them. */ 703 + offset = (offset + 1) % 8; 771 704 current_vector = FIRST_DEVICE_VECTOR + offset; 772 705 } 773 706 ··· 1982 1917 entry.polarity = active_high_low; 1983 1918 entry.mask = 1; /* Disabled (masked) */ 1984 1919 1920 + irq = gsi_irq_sharing(irq); 1985 1921 /* 1986 1922 * IRQs < 16 are already in the irq_2_pin[] map 1987 1923 */

+12 -5

arch/x86_64/kernel/mce.c

··· 37 37 static unsigned long console_logged; 38 38 static int notify_user; 39 39 static int rip_msr; 40 - static int mce_bootlog; 40 + static int mce_bootlog = 1; 41 41 42 42 /* 43 43 * Lockless MCE logging infrastructure. ··· 347 347 /* disable GART TBL walk error reporting, which trips off 348 348 incorrectly with the IOMMU & 3ware & Cerberus. */ 349 349 clear_bit(10, &bank[4]); 350 + /* Lots of broken BIOS around that don't clear them 351 + by default and leave crap in there. Don't log. */ 352 + mce_bootlog = 0; 350 353 } 354 + 351 355 } 352 356 353 357 static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) ··· 359 355 switch (c->x86_vendor) { 360 356 case X86_VENDOR_INTEL: 361 357 mce_intel_feature_init(c); 358 + break; 359 + case X86_VENDOR_AMD: 360 + mce_amd_feature_init(c); 362 361 break; 363 362 default: 364 363 break; ··· 502 495 /* mce=off disables machine check. Note you can reenable it later 503 496 using sysfs. 504 497 mce=TOLERANCELEVEL (number, see above) 505 - mce=bootlog Log MCEs from before booting. Disabled by default to work 506 - around buggy BIOS that leave bogus MCEs. */ 498 + mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 499 + mce=nobootlog Don't log MCEs from before booting. */ 507 500 static int __init mcheck_enable(char *str) 508 501 { 509 502 if (*str == '=') 510 503 str++; 511 504 if (!strcmp(str, "off")) 512 505 mce_dont_init = 1; 513 - else if (!strcmp(str, "bootlog")) 514 - mce_bootlog = 1; 506 + else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) 507 + mce_bootlog = str[0] == 'b'; 515 508 else if (isdigit(str[0])) 516 509 get_option(&str, &tolerant); 517 510 else

+538

arch/x86_64/kernel/mce_amd.c

··· 1 + /* 2 + * (c) 2005 Advanced Micro Devices, Inc. 3 + * Your use of this code is subject to the terms and conditions of the 4 + * GNU general public license version 2. See "COPYING" or 5 + * http://www.gnu.org/licenses/gpl.html 6 + * 7 + * Written by Jacob Shin - AMD, Inc. 8 + * 9 + * Support : jacob.shin@amd.com 10 + * 11 + * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F. 12 + * MC4_MISC0 exists per physical processor. 13 + * 14 + */ 15 + 16 + #include <linux/cpu.h> 17 + #include <linux/errno.h> 18 + #include <linux/init.h> 19 + #include <linux/interrupt.h> 20 + #include <linux/kobject.h> 21 + #include <linux/notifier.h> 22 + #include <linux/sched.h> 23 + #include <linux/smp.h> 24 + #include <linux/sysdev.h> 25 + #include <linux/sysfs.h> 26 + #include <asm/apic.h> 27 + #include <asm/mce.h> 28 + #include <asm/msr.h> 29 + #include <asm/percpu.h> 30 + 31 + #define PFX "mce_threshold: " 32 + #define VERSION "version 1.00.9" 33 + #define NR_BANKS 5 34 + #define THRESHOLD_MAX 0xFFF 35 + #define INT_TYPE_APIC 0x00020000 36 + #define MASK_VALID_HI 0x80000000 37 + #define MASK_LVTOFF_HI 0x00F00000 38 + #define MASK_COUNT_EN_HI 0x00080000 39 + #define MASK_INT_TYPE_HI 0x00060000 40 + #define MASK_OVERFLOW_HI 0x00010000 41 + #define MASK_ERR_COUNT_HI 0x00000FFF 42 + #define MASK_OVERFLOW 0x0001000000000000L 43 + 44 + struct threshold_bank { 45 + unsigned int cpu; 46 + u8 bank; 47 + u8 interrupt_enable; 48 + u16 threshold_limit; 49 + struct kobject kobj; 50 + }; 51 + 52 + static struct threshold_bank threshold_defaults = { 53 + .interrupt_enable = 0, 54 + .threshold_limit = THRESHOLD_MAX, 55 + }; 56 + 57 + #ifdef CONFIG_SMP 58 + static unsigned char shared_bank[NR_BANKS] = { 59 + 0, 0, 0, 0, 1 60 + }; 61 + #endif 62 + 63 + static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 64 + 65 + /* 66 + * CPU Initialization 67 + */ 68 + 69 + /* must be called with correct cpu affinity */ 70 + static void threshold_restart_bank(struct threshold_bank *b, 71 + int reset, u16 old_limit) 72 + { 73 + u32 mci_misc_hi, mci_misc_lo; 74 + 75 + rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); 76 + 77 + if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 78 + reset = 1; /* limit cannot be lower than err count */ 79 + 80 + if (reset) { /* reset err count and overflow bit */ 81 + mci_misc_hi = 82 + (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 83 + (THRESHOLD_MAX - b->threshold_limit); 84 + } else if (old_limit) { /* change limit w/o reset */ 85 + int new_count = (mci_misc_hi & THRESHOLD_MAX) + 86 + (old_limit - b->threshold_limit); 87 + mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 88 + (new_count & THRESHOLD_MAX); 89 + } 90 + 91 + b->interrupt_enable ? 92 + (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 93 + (mci_misc_hi &= ~MASK_INT_TYPE_HI); 94 + 95 + mci_misc_hi |= MASK_COUNT_EN_HI; 96 + wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); 97 + } 98 + 99 + void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) 100 + { 101 + int bank; 102 + u32 mci_misc_lo, mci_misc_hi; 103 + unsigned int cpu = smp_processor_id(); 104 + 105 + for (bank = 0; bank < NR_BANKS; ++bank) { 106 + rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi); 107 + 108 + /* !valid, !counter present, bios locked */ 109 + if (!(mci_misc_hi & MASK_VALID_HI) || 110 + !(mci_misc_hi & MASK_VALID_HI >> 1) || 111 + (mci_misc_hi & MASK_VALID_HI >> 2)) 112 + continue; 113 + 114 + per_cpu(bank_map, cpu) |= (1 << bank); 115 + 116 + #ifdef CONFIG_SMP 117 + if (shared_bank[bank] && cpu_core_id[cpu]) 118 + continue; 119 + #endif 120 + 121 + setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20); 122 + threshold_defaults.cpu = cpu; 123 + threshold_defaults.bank = bank; 124 + threshold_restart_bank(&threshold_defaults, 0, 0); 125 + } 126 + } 127 + 128 + /* 129 + * APIC Interrupt Handler 130 + */ 131 + 132 + /* 133 + * threshold interrupt handler will service THRESHOLD_APIC_VECTOR. 134 + * the interrupt goes off when error_count reaches threshold_limit. 135 + * the handler will simply log mcelog w/ software defined bank number. 136 + */ 137 + asmlinkage void mce_threshold_interrupt(void) 138 + { 139 + int bank; 140 + struct mce m; 141 + 142 + ack_APIC_irq(); 143 + irq_enter(); 144 + 145 + memset(&m, 0, sizeof(m)); 146 + rdtscll(m.tsc); 147 + m.cpu = smp_processor_id(); 148 + 149 + /* assume first bank caused it */ 150 + for (bank = 0; bank < NR_BANKS; ++bank) { 151 + m.bank = MCE_THRESHOLD_BASE + bank; 152 + rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc); 153 + 154 + if (m.misc & MASK_OVERFLOW) { 155 + mce_log(&m); 156 + goto out; 157 + } 158 + } 159 + out: 160 + irq_exit(); 161 + } 162 + 163 + /* 164 + * Sysfs Interface 165 + */ 166 + 167 + static struct sysdev_class threshold_sysclass = { 168 + set_kset_name("threshold"), 169 + }; 170 + 171 + static DEFINE_PER_CPU(struct sys_device, device_threshold); 172 + 173 + struct threshold_attr { 174 + struct attribute attr; 175 + ssize_t(*show) (struct threshold_bank *, char *); 176 + ssize_t(*store) (struct threshold_bank *, const char *, size_t count); 177 + }; 178 + 179 + static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 180 + 181 + static cpumask_t affinity_set(unsigned int cpu) 182 + { 183 + cpumask_t oldmask = current->cpus_allowed; 184 + cpumask_t newmask = CPU_MASK_NONE; 185 + cpu_set(cpu, newmask); 186 + set_cpus_allowed(current, newmask); 187 + return oldmask; 188 + } 189 + 190 + static void affinity_restore(cpumask_t oldmask) 191 + { 192 + set_cpus_allowed(current, oldmask); 193 + } 194 + 195 + #define SHOW_FIELDS(name) \ 196 + static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \ 197 + { \ 198 + return sprintf(buf, "%lx\n", (unsigned long) b->name); \ 199 + } 200 + SHOW_FIELDS(interrupt_enable) 201 + SHOW_FIELDS(threshold_limit) 202 + 203 + static ssize_t store_interrupt_enable(struct threshold_bank *b, 204 + const char *buf, size_t count) 205 + { 206 + char *end; 207 + cpumask_t oldmask; 208 + unsigned long new = simple_strtoul(buf, &end, 0); 209 + if (end == buf) 210 + return -EINVAL; 211 + b->interrupt_enable = !!new; 212 + 213 + oldmask = affinity_set(b->cpu); 214 + threshold_restart_bank(b, 0, 0); 215 + affinity_restore(oldmask); 216 + 217 + return end - buf; 218 + } 219 + 220 + static ssize_t store_threshold_limit(struct threshold_bank *b, 221 + const char *buf, size_t count) 222 + { 223 + char *end; 224 + cpumask_t oldmask; 225 + u16 old; 226 + unsigned long new = simple_strtoul(buf, &end, 0); 227 + if (end == buf) 228 + return -EINVAL; 229 + if (new > THRESHOLD_MAX) 230 + new = THRESHOLD_MAX; 231 + if (new < 1) 232 + new = 1; 233 + old = b->threshold_limit; 234 + b->threshold_limit = new; 235 + 236 + oldmask = affinity_set(b->cpu); 237 + threshold_restart_bank(b, 0, old); 238 + affinity_restore(oldmask); 239 + 240 + return end - buf; 241 + } 242 + 243 + static ssize_t show_error_count(struct threshold_bank *b, char *buf) 244 + { 245 + u32 high, low; 246 + cpumask_t oldmask; 247 + oldmask = affinity_set(b->cpu); 248 + rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */ 249 + affinity_restore(oldmask); 250 + return sprintf(buf, "%x\n", 251 + (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 252 + } 253 + 254 + static ssize_t store_error_count(struct threshold_bank *b, 255 + const char *buf, size_t count) 256 + { 257 + cpumask_t oldmask; 258 + oldmask = affinity_set(b->cpu); 259 + threshold_restart_bank(b, 1, 0); 260 + affinity_restore(oldmask); 261 + return 1; 262 + } 263 + 264 + #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ 265 + .attr = {.name = __stringify(_name), .mode = _mode }, \ 266 + .show = _show, \ 267 + .store = _store, \ 268 + }; 269 + 270 + #define ATTR_FIELDS(name) \ 271 + static struct threshold_attr name = \ 272 + THRESHOLD_ATTR(name, 0644, show_## name, store_## name) 273 + 274 + ATTR_FIELDS(interrupt_enable); 275 + ATTR_FIELDS(threshold_limit); 276 + ATTR_FIELDS(error_count); 277 + 278 + static struct attribute *default_attrs[] = { 279 + &interrupt_enable.attr, 280 + &threshold_limit.attr, 281 + &error_count.attr, 282 + NULL 283 + }; 284 + 285 + #define to_bank(k) container_of(k,struct threshold_bank,kobj) 286 + #define to_attr(a) container_of(a,struct threshold_attr,attr) 287 + 288 + static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 289 + { 290 + struct threshold_bank *b = to_bank(kobj); 291 + struct threshold_attr *a = to_attr(attr); 292 + ssize_t ret; 293 + ret = a->show ? a->show(b, buf) : -EIO; 294 + return ret; 295 + } 296 + 297 + static ssize_t store(struct kobject *kobj, struct attribute *attr, 298 + const char *buf, size_t count) 299 + { 300 + struct threshold_bank *b = to_bank(kobj); 301 + struct threshold_attr *a = to_attr(attr); 302 + ssize_t ret; 303 + ret = a->store ? a->store(b, buf, count) : -EIO; 304 + return ret; 305 + } 306 + 307 + static struct sysfs_ops threshold_ops = { 308 + .show = show, 309 + .store = store, 310 + }; 311 + 312 + static struct kobj_type threshold_ktype = { 313 + .sysfs_ops = &threshold_ops, 314 + .default_attrs = default_attrs, 315 + }; 316 + 317 + /* symlinks sibling shared banks to first core. first core owns dir/files. */ 318 + static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) 319 + { 320 + int err = 0; 321 + struct threshold_bank *b = 0; 322 + 323 + #ifdef CONFIG_SMP 324 + if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ 325 + char name[16]; 326 + unsigned lcpu = first_cpu(cpu_core_map[cpu]); 327 + if (cpu_core_id[lcpu]) 328 + goto out; /* first core not up yet */ 329 + 330 + b = per_cpu(threshold_banks, lcpu)[bank]; 331 + if (!b) 332 + goto out; 333 + sprintf(name, "bank%i", bank); 334 + err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj, 335 + &b->kobj, name); 336 + if (err) 337 + goto out; 338 + per_cpu(threshold_banks, cpu)[bank] = b; 339 + goto out; 340 + } 341 + #endif 342 + 343 + b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL); 344 + if (!b) { 345 + err = -ENOMEM; 346 + goto out; 347 + } 348 + memset(b, 0, sizeof(struct threshold_bank)); 349 + 350 + b->cpu = cpu; 351 + b->bank = bank; 352 + b->interrupt_enable = 0; 353 + b->threshold_limit = THRESHOLD_MAX; 354 + kobject_set_name(&b->kobj, "bank%i", bank); 355 + b->kobj.parent = &per_cpu(device_threshold, cpu).kobj; 356 + b->kobj.ktype = &threshold_ktype; 357 + 358 + err = kobject_register(&b->kobj); 359 + if (err) { 360 + kfree(b); 361 + goto out; 362 + } 363 + per_cpu(threshold_banks, cpu)[bank] = b; 364 + out: 365 + return err; 366 + } 367 + 368 + /* create dir/files for all valid threshold banks */ 369 + static __cpuinit int threshold_create_device(unsigned int cpu) 370 + { 371 + int bank; 372 + int err = 0; 373 + 374 + per_cpu(device_threshold, cpu).id = cpu; 375 + per_cpu(device_threshold, cpu).cls = &threshold_sysclass; 376 + err = sysdev_register(&per_cpu(device_threshold, cpu)); 377 + if (err) 378 + goto out; 379 + 380 + for (bank = 0; bank < NR_BANKS; ++bank) { 381 + if (!(per_cpu(bank_map, cpu) & 1 << bank)) 382 + continue; 383 + err = threshold_create_bank(cpu, bank); 384 + if (err) 385 + goto out; 386 + } 387 + out: 388 + return err; 389 + } 390 + 391 + #ifdef CONFIG_HOTPLUG_CPU 392 + /* 393 + * let's be hotplug friendly. 394 + * in case of multiple core processors, the first core always takes ownership 395 + * of shared sysfs dir/files, and rest of the cores will be symlinked to it. 396 + */ 397 + 398 + /* cpu hotplug call removes all symlinks before first core dies */ 399 + static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) 400 + { 401 + struct threshold_bank *b; 402 + char name[16]; 403 + 404 + b = per_cpu(threshold_banks, cpu)[bank]; 405 + if (!b) 406 + return; 407 + if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) { 408 + sprintf(name, "bank%i", bank); 409 + sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name); 410 + per_cpu(threshold_banks, cpu)[bank] = 0; 411 + } else { 412 + kobject_unregister(&b->kobj); 413 + kfree(per_cpu(threshold_banks, cpu)[bank]); 414 + } 415 + } 416 + 417 + static __cpuinit void threshold_remove_device(unsigned int cpu) 418 + { 419 + int bank; 420 + 421 + for (bank = 0; bank < NR_BANKS; ++bank) { 422 + if (!(per_cpu(bank_map, cpu) & 1 << bank)) 423 + continue; 424 + threshold_remove_bank(cpu, bank); 425 + } 426 + sysdev_unregister(&per_cpu(device_threshold, cpu)); 427 + } 428 + 429 + /* link all existing siblings when first core comes up */ 430 + static __cpuinit int threshold_create_symlinks(unsigned int cpu) 431 + { 432 + int bank, err = 0; 433 + unsigned int lcpu = 0; 434 + 435 + if (cpu_core_id[cpu]) 436 + return 0; 437 + for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { 438 + if (lcpu == cpu) 439 + continue; 440 + for (bank = 0; bank < NR_BANKS; ++bank) { 441 + if (!(per_cpu(bank_map, cpu) & 1 << bank)) 442 + continue; 443 + if (!shared_bank[bank]) 444 + continue; 445 + err = threshold_create_bank(lcpu, bank); 446 + } 447 + } 448 + return err; 449 + } 450 + 451 + /* remove all symlinks before first core dies. */ 452 + static __cpuinit void threshold_remove_symlinks(unsigned int cpu) 453 + { 454 + int bank; 455 + unsigned int lcpu = 0; 456 + if (cpu_core_id[cpu]) 457 + return; 458 + for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { 459 + if (lcpu == cpu) 460 + continue; 461 + for (bank = 0; bank < NR_BANKS; ++bank) { 462 + if (!(per_cpu(bank_map, cpu) & 1 << bank)) 463 + continue; 464 + if (!shared_bank[bank]) 465 + continue; 466 + threshold_remove_bank(lcpu, bank); 467 + } 468 + } 469 + } 470 + #else /* !CONFIG_HOTPLUG_CPU */ 471 + static __cpuinit void threshold_create_symlinks(unsigned int cpu) 472 + { 473 + } 474 + static __cpuinit void threshold_remove_symlinks(unsigned int cpu) 475 + { 476 + } 477 + static void threshold_remove_device(unsigned int cpu) 478 + { 479 + } 480 + #endif 481 + 482 + /* get notified when a cpu comes on/off */ 483 + static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, 484 + unsigned long action, void *hcpu) 485 + { 486 + /* cpu was unsigned int to begin with */ 487 + unsigned int cpu = (unsigned long)hcpu; 488 + 489 + if (cpu >= NR_CPUS) 490 + goto out; 491 + 492 + switch (action) { 493 + case CPU_ONLINE: 494 + threshold_create_device(cpu); 495 + threshold_create_symlinks(cpu); 496 + break; 497 + case CPU_DOWN_PREPARE: 498 + threshold_remove_symlinks(cpu); 499 + break; 500 + case CPU_DOWN_FAILED: 501 + threshold_create_symlinks(cpu); 502 + break; 503 + case CPU_DEAD: 504 + threshold_remove_device(cpu); 505 + break; 506 + default: 507 + break; 508 + } 509 + out: 510 + return NOTIFY_OK; 511 + } 512 + 513 + static struct notifier_block threshold_cpu_notifier = { 514 + .notifier_call = threshold_cpu_callback, 515 + }; 516 + 517 + static __init int threshold_init_device(void) 518 + { 519 + int err; 520 + int lcpu = 0; 521 + 522 + err = sysdev_class_register(&threshold_sysclass); 523 + if (err) 524 + goto out; 525 + 526 + /* to hit CPUs online before the notifier is up */ 527 + for_each_online_cpu(lcpu) { 528 + err = threshold_create_device(lcpu); 529 + if (err) 530 + goto out; 531 + } 532 + register_cpu_notifier(&threshold_cpu_notifier); 533 + 534 + out: 535 + return err; 536 + } 537 + 538 + device_initcall(threshold_init_device);

+15 -8

arch/x86_64/kernel/mpparse.c

··· 42 42 * Various Linux-internal data structures created from the 43 43 * MP-table. 44 44 */ 45 - int apic_version [MAX_APICS]; 45 + unsigned char apic_version [MAX_APICS]; 46 46 unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; 47 47 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; 48 48 ··· 65 65 /* Processor that is doing the boot up */ 66 66 unsigned int boot_cpu_id = -1U; 67 67 /* Internal processor count */ 68 - static unsigned int num_processors = 0; 68 + unsigned int num_processors __initdata = 0; 69 + 70 + unsigned disabled_cpus __initdata; 69 71 70 72 /* Bitmask of physically existing CPUs */ 71 73 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; ··· 108 106 109 107 static void __init MP_processor_info (struct mpc_config_processor *m) 110 108 { 111 - int ver, cpu; 109 + int cpu; 110 + unsigned char ver; 112 111 static int found_bsp=0; 113 112 114 - if (!(m->mpc_cpuflag & CPU_ENABLED)) 113 + if (!(m->mpc_cpuflag & CPU_ENABLED)) { 114 + disabled_cpus++; 115 115 return; 116 + } 116 117 117 118 printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", 118 119 m->mpc_apicid, ··· 134 129 } 135 130 136 131 cpu = num_processors++; 137 - 138 - if (m->mpc_apicid > MAX_APICS) { 132 + 133 + #if MAX_APICS < 255 134 + if ((int)m->mpc_apicid > MAX_APICS) { 139 135 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", 140 136 m->mpc_apicid, MAX_APICS); 141 137 return; 142 138 } 139 + #endif 143 140 ver = m->mpc_apicver; 144 141 145 142 physid_set(m->mpc_apicid, phys_cpu_present_map); ··· 225 218 m->mpc_irqtype, m->mpc_irqflag & 3, 226 219 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 227 220 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); 228 - if (++mp_irq_entries == MAX_IRQ_SOURCES) 221 + if (++mp_irq_entries >= MAX_IRQ_SOURCES) 229 222 panic("Max # of irq sources exceeded!!\n"); 230 223 } 231 224 ··· 556 549 * Read the physical hardware table. Anything here will 557 550 * override the defaults. 558 551 */ 559 - if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) { 552 + if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { 560 553 smp_found_config = 0; 561 554 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); 562 555 printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");

+7 -1

arch/x86_64/kernel/pci-gart.c

··· 220 220 uses the normal dma_mask for alloc_coherent. */ 221 221 dma_mask &= *dev->dma_mask; 222 222 223 + /* Why <=? Even when the mask is smaller than 4GB it is often larger 224 + than 16MB and in this case we have a chance of finding fitting memory 225 + in the next higher zone first. If not retry with true GFP_DMA. -AK */ 226 + if (dma_mask <= 0xffffffff) 227 + gfp |= GFP_DMA32; 228 + 223 229 again: 224 230 memory = dma_alloc_pages(dev, gfp, get_order(size)); 225 231 if (memory == NULL) ··· 251 245 } 252 246 253 247 if (!(gfp & GFP_DMA)) { 254 - gfp |= GFP_DMA; 248 + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; 255 249 goto again; 256 250 } 257 251 return NULL;

+27 -20

arch/x86_64/kernel/process.c

··· 144 144 do { 145 145 ssleep(1); 146 146 for_each_online_cpu(cpu) { 147 - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) 147 + if (cpu_isset(cpu, map) && 148 + !per_cpu(cpu_idle_state, cpu)) 148 149 cpu_clear(cpu, map); 149 150 } 150 151 cpus_and(map, map, cpu_online_map); ··· 276 275 system_utsname.version); 277 276 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); 278 277 printk_address(regs->rip); 279 - printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); 278 + printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, 279 + regs->eflags); 280 280 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 281 281 regs->rax, regs->rbx, regs->rcx); 282 282 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", ··· 429 427 struct pt_regs * childregs; 430 428 struct task_struct *me = current; 431 429 432 - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; 433 - 430 + childregs = ((struct pt_regs *) 431 + (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; 434 432 *childregs = *regs; 435 433 436 434 childregs->rax = 0; 437 435 childregs->rsp = rsp; 438 - if (rsp == ~0UL) { 436 + if (rsp == ~0UL) 439 437 childregs->rsp = (unsigned long)childregs; 440 - } 441 438 442 439 p->thread.rsp = (unsigned long) childregs; 443 440 p->thread.rsp0 = (unsigned long) (childregs+1); ··· 458 457 p->thread.io_bitmap_max = 0; 459 458 return -ENOMEM; 460 459 } 461 - memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); 460 + memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, 461 + IO_BITMAP_BYTES); 462 462 } 463 463 464 464 /* ··· 496 494 * - fold all the options into a flag word and test it with a single test. 497 495 * - could test fs/gs bitsliced 498 496 */ 499 - struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 497 + struct task_struct * 498 + __switch_to(struct task_struct *prev_p, struct task_struct *next_p) 500 499 { 501 500 struct thread_struct *prev = &prev_p->thread, 502 501 *next = &next_p->thread; ··· 568 565 prev->userrsp = read_pda(oldrsp); 569 566 write_pda(oldrsp, next->userrsp); 570 567 write_pda(pcurrent, next_p); 571 - write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); 568 + write_pda(kernelstack, 569 + (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); 572 570 573 571 /* 574 572 * Now maybe reload the debug registers ··· 650 646 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); 651 647 } 652 648 653 - asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) 649 + asmlinkage long 650 + sys_clone(unsigned long clone_flags, unsigned long newsp, 651 + void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) 654 652 { 655 653 if (!newsp) 656 654 newsp = regs->rsp; ··· 688 682 return 0; 689 683 fp = *(u64 *)(p->thread.rsp); 690 684 do { 691 - if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) 685 + if (fp < (unsigned long)stack || 686 + fp > (unsigned long)stack+THREAD_SIZE) 692 687 return 0; 693 688 rip = *(u64 *)(fp+8); 694 689 if (!in_sched_functions(rip)) ··· 724 717 task->thread.gsindex = 0; 725 718 task->thread.gs = addr; 726 719 if (doit) { 727 - load_gs_index(0); 728 - ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 720 + load_gs_index(0); 721 + ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 729 722 } 730 723 } 731 724 put_cpu(); ··· 742 735 set_32bit_tls(task, FS_TLS, addr); 743 736 if (doit) { 744 737 load_TLS(&task->thread, cpu); 745 - asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL)); 738 + asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); 746 739 } 747 740 task->thread.fsindex = FS_TLS_SEL; 748 741 task->thread.fs = 0; ··· 752 745 if (doit) { 753 746 /* set the selector to 0 to not confuse 754 747 __switch_to */ 755 - asm volatile("movl %0,%%fs" :: "r" (0)); 756 - ret = checking_wrmsrl(MSR_FS_BASE, addr); 748 + asm volatile("movl %0,%%fs" :: "r" (0)); 749 + ret = checking_wrmsrl(MSR_FS_BASE, addr); 757 750 } 758 751 } 759 752 put_cpu(); ··· 762 755 unsigned long base; 763 756 if (task->thread.fsindex == FS_TLS_SEL) 764 757 base = read_32bit_tls(task, FS_TLS); 765 - else if (doit) { 758 + else if (doit) 766 759 rdmsrl(MSR_FS_BASE, base); 767 - } else 760 + else 768 761 base = task->thread.fs; 769 762 ret = put_user(base, (unsigned long __user *)addr); 770 763 break; ··· 773 766 unsigned long base; 774 767 if (task->thread.gsindex == GS_TLS_SEL) 775 768 base = read_32bit_tls(task, GS_TLS); 776 - else if (doit) { 769 + else if (doit) 777 770 rdmsrl(MSR_KERNEL_GS_BASE, base); 778 - } else 771 + else 779 772 base = task->thread.gs; 780 773 ret = put_user(base, (unsigned long __user *)addr); 781 774 break;

+4 -3

arch/x86_64/kernel/reboot.c

··· 77 77 78 78 void machine_shutdown(void) 79 79 { 80 + unsigned long flags; 80 81 /* Stop the cpus and apics */ 81 82 #ifdef CONFIG_SMP 82 83 int reboot_cpu_id; ··· 99 98 smp_send_stop(); 100 99 #endif 101 100 102 - local_irq_disable(); 101 + local_irq_save(flags); 103 102 104 103 #ifndef CONFIG_SMP 105 104 disable_local_APIC(); ··· 107 106 108 107 disable_IO_APIC(); 109 108 110 - local_irq_enable(); 109 + local_irq_restore(flags); 111 110 } 112 111 113 112 void machine_emergency_restart(void) ··· 121 120 /* Could also try the reset bit in the Hammer NB */ 122 121 switch (reboot_type) { 123 122 case BOOT_KBD: 124 - for (i=0; i<100; i++) { 123 + for (i=0; i<10; i++) { 125 124 kb_wait(); 126 125 udelay(50); 127 126 outb(0xfe,0x64); /* pulse reset low */

+41 -48

arch/x86_64/kernel/setup.c

··· 60 60 #include <asm/setup.h> 61 61 #include <asm/mach_apic.h> 62 62 #include <asm/numa.h> 63 + #include <asm/sections.h> 63 64 64 65 /* 65 66 * Machine setup.. ··· 104 103 struct e820map e820; 105 104 106 105 extern int root_mountflags; 107 - extern char _text, _etext, _edata, _end; 108 106 109 107 char command_line[COMMAND_LINE_SIZE]; 110 108 ··· 412 412 { 413 413 unsigned long bootmap_size, bootmap; 414 414 415 - memory_present(0, start_pfn, end_pfn); 416 415 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; 417 416 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); 418 417 if (bootmap == -1L) ··· 570 571 571 572 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); 572 573 574 + zap_low_mappings(0); 575 + 573 576 #ifdef CONFIG_ACPI 574 577 /* 575 578 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). ··· 657 656 crashk_res.end - crashk_res.start + 1); 658 657 } 659 658 #endif 660 - 661 - sparse_init(); 662 659 663 660 paging_init(); 664 661 ··· 792 793 #endif 793 794 794 795 bits = 0; 795 - while ((1 << bits) < c->x86_num_cores) 796 + while ((1 << bits) < c->x86_max_cores) 796 797 bits++; 797 798 798 799 /* Low order bits define the core id (index of core in socket) */ ··· 822 823 if (!node_online(node)) 823 824 node = nearby_node(apicid); 824 825 } 825 - cpu_to_node[cpu] = node; 826 + numa_set_node(cpu, node); 826 827 827 828 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", 828 - cpu, c->x86_num_cores, node, cpu_core_id[cpu]); 829 + cpu, c->x86_max_cores, node, cpu_core_id[cpu]); 829 830 #endif 830 831 #endif 831 832 } ··· 874 875 display_cacheinfo(c); 875 876 876 877 if (c->extended_cpuid_level >= 0x80000008) { 877 - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 878 - if (c->x86_num_cores & (c->x86_num_cores - 1)) 879 - c->x86_num_cores = 1; 878 + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 879 + if (c->x86_max_cores & (c->x86_max_cores - 1)) 880 + c->x86_max_cores = 1; 880 881 881 882 amd_detect_cmp(c); 882 883 } ··· 888 889 { 889 890 #ifdef CONFIG_SMP 890 891 u32 eax, ebx, ecx, edx; 891 - int index_msb, tmp; 892 + int index_msb, core_bits; 892 893 int cpu = smp_processor_id(); 893 - 894 + 895 + cpuid(1, &eax, &ebx, &ecx, &edx); 896 + 897 + c->apicid = phys_pkg_id(0); 898 + 894 899 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 895 900 return; 896 901 897 - cpuid(1, &eax, &ebx, &ecx, &edx); 898 902 smp_num_siblings = (ebx & 0xff0000) >> 16; 899 - 903 + 900 904 if (smp_num_siblings == 1) { 901 905 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 902 - } else if (smp_num_siblings > 1) { 903 - index_msb = 31; 904 - /* 905 - * At this point we only support two siblings per 906 - * processor package. 907 - */ 906 + } else if (smp_num_siblings > 1 ) { 907 + 908 908 if (smp_num_siblings > NR_CPUS) { 909 909 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); 910 910 smp_num_siblings = 1; 911 911 return; 912 912 } 913 - tmp = smp_num_siblings; 914 - while ((tmp & 0x80000000 ) == 0) { 915 - tmp <<=1 ; 916 - index_msb--; 917 - } 918 - if (smp_num_siblings & (smp_num_siblings - 1)) 919 - index_msb++; 913 + 914 + index_msb = get_count_order(smp_num_siblings); 920 915 phys_proc_id[cpu] = phys_pkg_id(index_msb); 921 - 916 + 922 917 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 923 918 phys_proc_id[cpu]); 924 919 925 - smp_num_siblings = smp_num_siblings / c->x86_num_cores; 920 + smp_num_siblings = smp_num_siblings / c->x86_max_cores; 926 921 927 - tmp = smp_num_siblings; 928 - index_msb = 31; 929 - while ((tmp & 0x80000000) == 0) { 930 - tmp <<=1 ; 931 - index_msb--; 932 - } 933 - if (smp_num_siblings & (smp_num_siblings - 1)) 934 - index_msb++; 922 + index_msb = get_count_order(smp_num_siblings) ; 935 923 936 - cpu_core_id[cpu] = phys_pkg_id(index_msb); 924 + core_bits = get_count_order(c->x86_max_cores); 937 925 938 - if (c->x86_num_cores > 1) 926 + cpu_core_id[cpu] = phys_pkg_id(index_msb) & 927 + ((1 << core_bits) - 1); 928 + 929 + if (c->x86_max_cores > 1) 939 930 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 940 931 cpu_core_id[cpu]); 941 932 } ··· 964 975 node = apicid_to_node[hard_smp_processor_id()]; 965 976 if (node == NUMA_NO_NODE) 966 977 node = 0; 967 - cpu_to_node[cpu] = node; 978 + numa_set_node(cpu, node); 968 979 969 980 if (acpi_numa > 0) 970 981 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); ··· 982 993 unsigned eax = cpuid_eax(0x80000008); 983 994 c->x86_virt_bits = (eax >> 8) & 0xff; 984 995 c->x86_phys_bits = eax & 0xff; 996 + /* CPUID workaround for Intel 0F34 CPU */ 997 + if (c->x86_vendor == X86_VENDOR_INTEL && 998 + c->x86 == 0xF && c->x86_model == 0x3 && 999 + c->x86_mask == 0x4) 1000 + c->x86_phys_bits = 36; 985 1001 } 986 1002 987 1003 if (c->x86 == 15) 988 1004 c->x86_cache_alignment = c->x86_clflush_size * 2; 989 1005 if (c->x86 >= 15) 990 1006 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 991 - c->x86_num_cores = intel_num_cpu_cores(c); 1007 + c->x86_max_cores = intel_num_cpu_cores(c); 992 1008 993 1009 srat_detect_node(); 994 1010 } ··· 1031 1037 c->x86_model_id[0] = '\0'; /* Unset */ 1032 1038 c->x86_clflush_size = 64; 1033 1039 c->x86_cache_alignment = c->x86_clflush_size; 1034 - c->x86_num_cores = 1; 1040 + c->x86_max_cores = 1; 1035 1041 c->extended_cpuid_level = 0; 1036 1042 memset(&c->x86_capability, 0, sizeof c->x86_capability); 1037 1043 ··· 1054 1060 c->x86 = (tfms >> 8) & 0xf; 1055 1061 c->x86_model = (tfms >> 4) & 0xf; 1056 1062 c->x86_mask = tfms & 0xf; 1057 - if (c->x86 == 0xf) { 1063 + if (c->x86 == 0xf) 1058 1064 c->x86 += (tfms >> 20) & 0xff; 1065 + if (c->x86 >= 0x6) 1059 1066 c->x86_model += ((tfms >> 16) & 0xF) << 4; 1060 - } 1061 1067 if (c->x86_capability[0] & (1<<19)) 1062 1068 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 1063 1069 } else { ··· 1265 1271 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); 1266 1272 1267 1273 #ifdef CONFIG_SMP 1268 - if (smp_num_siblings * c->x86_num_cores > 1) { 1274 + if (smp_num_siblings * c->x86_max_cores > 1) { 1269 1275 int cpu = c - cpu_data; 1270 1276 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); 1271 - seq_printf(m, "siblings\t: %d\n", 1272 - c->x86_num_cores * smp_num_siblings); 1277 + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); 1273 1278 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); 1274 - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); 1279 + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); 1275 1280 } 1276 1281 #endif 1277 1282

+1 -1

arch/x86_64/kernel/setup64.c

··· 141 141 panic("cannot allocate irqstack for cpu %d", cpu); 142 142 } 143 143 144 - asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); 145 144 146 145 pda->irqstackptr += IRQSTACKSIZE-64; 147 146 } ··· 196 197 /* CPU 0 is initialised in head64.c */ 197 198 if (cpu != 0) { 198 199 pda_init(cpu); 200 + zap_low_mappings(cpu); 199 201 } else 200 202 estacks = boot_exception_stacks; 201 203

+17

arch/x86_64/kernel/signal.c

··· 110 110 COPY(r14); 111 111 COPY(r15); 112 112 113 + /* Kernel saves and restores only the CS segment register on signals, 114 + * which is the bare minimum needed to allow mixed 32/64-bit code. 115 + * App's signal handler can save/restore other segments if needed. */ 116 + { 117 + unsigned cs; 118 + err |= __get_user(cs, &sc->cs); 119 + regs->cs = cs | 3; /* Force into user mode */ 120 + } 121 + 113 122 { 114 123 unsigned int tmpflags; 115 124 err |= __get_user(tmpflags, &sc->eflags); ··· 196 187 { 197 188 int err = 0; 198 189 190 + err |= __put_user(regs->cs, &sc->cs); 199 191 err |= __put_user(0, &sc->gs); 200 192 err |= __put_user(0, &sc->fs); 201 193 ··· 328 318 329 319 regs->rsp = (unsigned long)frame; 330 320 321 + /* Set up the CS register to run signal handlers in 64-bit mode, 322 + even if the handler happens to be interrupting 32-bit code. */ 323 + regs->cs = __USER_CS; 324 + 325 + /* This, by contrast, has nothing to do with segment registers - 326 + see include/asm-x86_64/uaccess.h for details. */ 331 327 set_fs(USER_DS); 328 + 332 329 regs->eflags &= ~TF_MASK; 333 330 if (test_thread_flag(TIF_SINGLESTEP)) 334 331 ptrace_notify(SIGTRAP);

+3 -4

arch/x86_64/kernel/smp.c

··· 28 28 #include <asm/proto.h> 29 29 #include <asm/apicdef.h> 30 30 31 - #define __cpuinit __init 32 - 33 31 /* 34 32 * Smarter SMP flushing macros. 35 33 * c/o Linus Torvalds. ··· 450 452 451 453 void smp_stop_cpu(void) 452 454 { 455 + unsigned long flags; 453 456 /* 454 457 * Remove this CPU: 455 458 */ 456 459 cpu_clear(smp_processor_id(), cpu_online_map); 457 - local_irq_disable(); 460 + local_irq_save(flags); 458 461 disable_local_APIC(); 459 - local_irq_enable(); 462 + local_irq_restore(flags); 460 463 } 461 464 462 465 static void smp_really_stop_cpu(void *dummy)

+91 -20

arch/x86_64/kernel/smpboot.c

··· 64 64 int smp_num_siblings = 1; 65 65 /* Package ID of each logical CPU */ 66 66 u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 67 + /* core ID of each logical CPU */ 67 68 u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 68 69 69 70 /* Bitmask of currently online CPUs */ ··· 88 87 /* Set when the idlers are all forked */ 89 88 int smp_threads_ready; 90 89 90 + /* representing HT siblings of each logical CPU */ 91 91 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 92 + 93 + /* representing HT and core siblings of each logical CPU */ 92 94 cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 93 95 EXPORT_SYMBOL(cpu_core_map); 94 96 ··· 438 434 cpu_set(cpuid, cpu_callin_map); 439 435 } 440 436 437 + /* representing cpus for which sibling maps can be computed */ 438 + static cpumask_t cpu_sibling_setup_map; 439 + 441 440 static inline void set_cpu_sibling_map(int cpu) 442 441 { 443 442 int i; 443 + struct cpuinfo_x86 *c = cpu_data; 444 + 445 + cpu_set(cpu, cpu_sibling_setup_map); 444 446 445 447 if (smp_num_siblings > 1) { 446 - for_each_cpu(i) { 447 - if (cpu_core_id[cpu] == cpu_core_id[i]) { 448 + for_each_cpu_mask(i, cpu_sibling_setup_map) { 449 + if (phys_proc_id[cpu] == phys_proc_id[i] && 450 + cpu_core_id[cpu] == cpu_core_id[i]) { 448 451 cpu_set(i, cpu_sibling_map[cpu]); 449 452 cpu_set(cpu, cpu_sibling_map[i]); 453 + cpu_set(i, cpu_core_map[cpu]); 454 + cpu_set(cpu, cpu_core_map[i]); 450 455 } 451 456 } 452 457 } else { 453 458 cpu_set(cpu, cpu_sibling_map[cpu]); 454 459 } 455 460 456 - if (current_cpu_data.x86_num_cores > 1) { 457 - for_each_cpu(i) { 458 - if (phys_proc_id[cpu] == phys_proc_id[i]) { 459 - cpu_set(i, cpu_core_map[cpu]); 460 - cpu_set(cpu, cpu_core_map[i]); 461 - } 462 - } 463 - } else { 461 + if (current_cpu_data.x86_max_cores == 1) { 464 462 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 463 + c[cpu].booted_cores = 1; 464 + return; 465 + } 466 + 467 + for_each_cpu_mask(i, cpu_sibling_setup_map) { 468 + if (phys_proc_id[cpu] == phys_proc_id[i]) { 469 + cpu_set(i, cpu_core_map[cpu]); 470 + cpu_set(cpu, cpu_core_map[i]); 471 + /* 472 + * Does this new cpu bringup a new core? 473 + */ 474 + if (cpus_weight(cpu_sibling_map[cpu]) == 1) { 475 + /* 476 + * for each core in package, increment 477 + * the booted_cores for this new cpu 478 + */ 479 + if (first_cpu(cpu_sibling_map[i]) == i) 480 + c[cpu].booted_cores++; 481 + /* 482 + * increment the core count for all 483 + * the other cpus in this package 484 + */ 485 + if (i != cpu) 486 + c[i].booted_cores++; 487 + } else if (i != cpu && !c[cpu].booted_cores) 488 + c[cpu].booted_cores = c[i].booted_cores; 489 + } 465 490 } 466 491 } 467 492 ··· 912 879 } 913 880 914 881 #ifdef CONFIG_HOTPLUG_CPU 882 + 883 + int additional_cpus __initdata = -1; 884 + 915 885 /* 916 886 * cpu_possible_map should be static, it cannot change as cpu's 917 887 * are onlined, or offlined. The reason is per-cpu data-structures ··· 923 887 * cpu_present_map on the other hand can change dynamically. 924 888 * In case when cpu_hotplug is not compiled, then we resort to current 925 889 * behaviour, which is cpu_possible == cpu_present. 926 - * If cpu-hotplug is supported, then we need to preallocate for all 927 - * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. 928 890 * - Ashok Raj 891 + * 892 + * Three ways to find out the number of additional hotplug CPUs: 893 + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. 894 + * - otherwise use half of the available CPUs or 2, whatever is more. 895 + * - The user can overwrite it with additional_cpus=NUM 896 + * We do this because additional CPUs waste a lot of memory. 897 + * -AK 929 898 */ 930 899 __init void prefill_possible_map(void) 931 900 { 932 901 int i; 933 - for (i = 0; i < NR_CPUS; i++) 902 + int possible; 903 + 904 + if (additional_cpus == -1) { 905 + if (disabled_cpus > 0) { 906 + additional_cpus = disabled_cpus; 907 + } else { 908 + additional_cpus = num_processors / 2; 909 + if (additional_cpus == 0) 910 + additional_cpus = 2; 911 + } 912 + } 913 + possible = num_processors + additional_cpus; 914 + if (possible > NR_CPUS) 915 + possible = NR_CPUS; 916 + 917 + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 918 + possible, 919 + max_t(int, possible - num_processors, 0)); 920 + 921 + for (i = 0; i < possible; i++) 934 922 cpu_set(i, cpu_possible_map); 935 923 } 936 924 #endif ··· 1025 965 nmi_watchdog_default(); 1026 966 current_cpu_data = boot_cpu_data; 1027 967 current_thread_info()->cpu = 0; /* needed? */ 968 + set_cpu_sibling_map(0); 1028 969 1029 970 if (smp_sanity_check(max_cpus) < 0) { 1030 971 printk(KERN_INFO "SMP disabled\n"); ··· 1069 1008 int me = smp_processor_id(); 1070 1009 cpu_set(me, cpu_online_map); 1071 1010 cpu_set(me, cpu_callout_map); 1072 - cpu_set(0, cpu_sibling_map[0]); 1073 - cpu_set(0, cpu_core_map[0]); 1074 1011 per_cpu(cpu_state, me) = CPU_ONLINE; 1075 1012 } 1076 1013 ··· 1121 1062 */ 1122 1063 void __init smp_cpus_done(unsigned int max_cpus) 1123 1064 { 1124 - #ifndef CONFIG_HOTPLUG_CPU 1125 - zap_low_mappings(); 1126 - #endif 1127 1065 smp_cleanup_boot(); 1128 1066 1129 1067 #ifdef CONFIG_X86_IO_APIC ··· 1137 1081 static void remove_siblinginfo(int cpu) 1138 1082 { 1139 1083 int sibling; 1084 + struct cpuinfo_x86 *c = cpu_data; 1140 1085 1086 + for_each_cpu_mask(sibling, cpu_core_map[cpu]) { 1087 + cpu_clear(cpu, cpu_core_map[sibling]); 1088 + /* 1089 + * last thread sibling in this cpu core going down 1090 + */ 1091 + if (cpus_weight(cpu_sibling_map[cpu]) == 1) 1092 + c[sibling].booted_cores--; 1093 + } 1094 + 1141 1095 for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) 1142 1096 cpu_clear(cpu, cpu_sibling_map[sibling]); 1143 - for_each_cpu_mask(sibling, cpu_core_map[cpu]) 1144 - cpu_clear(cpu, cpu_core_map[sibling]); 1145 1097 cpus_clear(cpu_sibling_map[cpu]); 1146 1098 cpus_clear(cpu_core_map[cpu]); 1147 1099 phys_proc_id[cpu] = BAD_APICID; 1148 1100 cpu_core_id[cpu] = BAD_APICID; 1101 + cpu_clear(cpu, cpu_sibling_setup_map); 1149 1102 } 1150 1103 1151 1104 void remove_cpu_from_maps(void) ··· 1217 1152 } 1218 1153 printk(KERN_ERR "CPU %u didn't die...\n", cpu); 1219 1154 } 1155 + 1156 + static __init int setup_additional_cpus(char *s) 1157 + { 1158 + return get_option(&s, &additional_cpus); 1159 + } 1160 + __setup("additional_cpus=", setup_additional_cpus); 1220 1161 1221 1162 #else /* ... !CONFIG_HOTPLUG_CPU */ 1222 1163

-14

arch/x86_64/kernel/sys_x86_64.c

··· 154 154 err |= copy_to_user(&name->machine, "i686", 5); 155 155 return err ? -EFAULT : 0; 156 156 } 157 - 158 - asmlinkage long sys_time64(long __user * tloc) 159 - { 160 - struct timeval now; 161 - int i; 162 - 163 - do_gettimeofday(&now); 164 - i = now.tv_sec; 165 - if (tloc) { 166 - if (put_user(i,tloc)) 167 - i = -EFAULT; 168 - } 169 - return i; 170 - }

+4 -40

arch/x86_64/kernel/traps.c

··· 428 428 { 429 429 conditional_sti(regs); 430 430 431 - #ifdef CONFIG_CHECKING 432 - { 433 - unsigned long gs; 434 - struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 435 - rdmsrl(MSR_GS_BASE, gs); 436 - if (gs != (unsigned long)pda) { 437 - wrmsrl(MSR_GS_BASE, pda); 438 - printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda, 439 - regs->rip); 440 - } 441 - } 442 - #endif 443 - 444 431 if (user_mode(regs)) { 445 432 struct task_struct *tsk = current; 446 433 ··· 499 512 long error_code) 500 513 { 501 514 conditional_sti(regs); 502 - 503 - #ifdef CONFIG_CHECKING 504 - { 505 - unsigned long gs; 506 - struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 507 - rdmsrl(MSR_GS_BASE, gs); 508 - if (gs != (unsigned long)pda) { 509 - wrmsrl(MSR_GS_BASE, pda); 510 - oops_in_progress++; 511 - printk("general protection handler: wrong gs %lx expected %p\n", gs, pda); 512 - oops_in_progress--; 513 - } 514 - } 515 - #endif 516 515 517 516 if (user_mode(regs)) { 518 517 struct task_struct *tsk = current; ··· 637 664 unsigned long condition; 638 665 struct task_struct *tsk = current; 639 666 siginfo_t info; 640 - 641 - #ifdef CONFIG_CHECKING 642 - { 643 - /* RED-PEN interaction with debugger - could destroy gs */ 644 - unsigned long gs; 645 - struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); 646 - rdmsrl(MSR_GS_BASE, gs); 647 - if (gs != (unsigned long)pda) { 648 - wrmsrl(MSR_GS_BASE, pda); 649 - printk("debug handler: wrong gs %lx expected %p\n", gs, pda); 650 - } 651 - } 652 - #endif 653 667 654 668 get_debugreg(condition, 6); 655 669 ··· 845 885 } 846 886 847 887 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) 888 + { 889 + } 890 + 891 + asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) 848 892 { 849 893 } 850 894

+1 -1

arch/x86_64/kernel/vmlinux.lds.S

··· 50 50 *(.bss.page_aligned) 51 51 *(.bss) 52 52 } 53 - __bss_end = .; 53 + __bss_stop = .; 54 54 55 55 . = ALIGN(PAGE_SIZE); 56 56 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);

+3

arch/x86_64/kernel/x8664_ksyms.c

··· 203 203 #endif 204 204 205 205 EXPORT_SYMBOL(cpu_khz); 206 + 207 + EXPORT_SYMBOL(load_gs_index); 208 +

-38

arch/x86_64/lib/clear_page.S

··· 5 5 .globl clear_page 6 6 .p2align 4 7 7 clear_page: 8 - xorl %eax,%eax 9 - movl $4096/64,%ecx 10 - .p2align 4 11 - .Lloop: 12 - decl %ecx 13 - #define PUT(x) movq %rax,x*8(%rdi) 14 - movq %rax,(%rdi) 15 - PUT(1) 16 - PUT(2) 17 - PUT(3) 18 - PUT(4) 19 - PUT(5) 20 - PUT(6) 21 - PUT(7) 22 - leaq 64(%rdi),%rdi 23 - jnz .Lloop 24 - nop 25 - ret 26 - clear_page_end: 27 - 28 - /* C stepping K8 run faster using the string instructions. 29 - It is also a lot simpler. Use this when possible */ 30 - 31 - #include <asm/cpufeature.h> 32 - 33 - .section .altinstructions,"a" 34 - .align 8 35 - .quad clear_page 36 - .quad clear_page_c 37 - .byte X86_FEATURE_K8_C 38 - .byte clear_page_end-clear_page 39 - .byte clear_page_c_end-clear_page_c 40 - .previous 41 - 42 - .section .altinstr_replacement,"ax" 43 - clear_page_c: 44 8 movl $4096/8,%ecx 45 9 xorl %eax,%eax 46 10 rep 47 11 stosq 48 12 ret 49 - clear_page_c_end: 50 - .previous

-87

arch/x86_64/lib/copy_page.S

··· 8 8 .globl copy_page 9 9 .p2align 4 10 10 copy_page: 11 - subq $3*8,%rsp 12 - movq %rbx,(%rsp) 13 - movq %r12,1*8(%rsp) 14 - movq %r13,2*8(%rsp) 15 - 16 - movl $(4096/64)-5,%ecx 17 - .p2align 4 18 - .Loop64: 19 - dec %rcx 20 - 21 - movq (%rsi), %rax 22 - movq 8 (%rsi), %rbx 23 - movq 16 (%rsi), %rdx 24 - movq 24 (%rsi), %r8 25 - movq 32 (%rsi), %r9 26 - movq 40 (%rsi), %r10 27 - movq 48 (%rsi), %r11 28 - movq 56 (%rsi), %r12 29 - 30 - prefetcht0 5*64(%rsi) 31 - 32 - movq %rax, (%rdi) 33 - movq %rbx, 8 (%rdi) 34 - movq %rdx, 16 (%rdi) 35 - movq %r8, 24 (%rdi) 36 - movq %r9, 32 (%rdi) 37 - movq %r10, 40 (%rdi) 38 - movq %r11, 48 (%rdi) 39 - movq %r12, 56 (%rdi) 40 - 41 - leaq 64 (%rsi), %rsi 42 - leaq 64 (%rdi), %rdi 43 - 44 - jnz .Loop64 45 - 46 - movl $5,%ecx 47 - .p2align 4 48 - .Loop2: 49 - decl %ecx 50 - 51 - movq (%rsi), %rax 52 - movq 8 (%rsi), %rbx 53 - movq 16 (%rsi), %rdx 54 - movq 24 (%rsi), %r8 55 - movq 32 (%rsi), %r9 56 - movq 40 (%rsi), %r10 57 - movq 48 (%rsi), %r11 58 - movq 56 (%rsi), %r12 59 - 60 - movq %rax, (%rdi) 61 - movq %rbx, 8 (%rdi) 62 - movq %rdx, 16 (%rdi) 63 - movq %r8, 24 (%rdi) 64 - movq %r9, 32 (%rdi) 65 - movq %r10, 40 (%rdi) 66 - movq %r11, 48 (%rdi) 67 - movq %r12, 56 (%rdi) 68 - 69 - leaq 64(%rdi),%rdi 70 - leaq 64(%rsi),%rsi 71 - 72 - jnz .Loop2 73 - 74 - movq (%rsp),%rbx 75 - movq 1*8(%rsp),%r12 76 - movq 2*8(%rsp),%r13 77 - addq $3*8,%rsp 78 - ret 79 - 80 - /* C stepping K8 run faster using the string copy instructions. 81 - It is also a lot simpler. Use this when possible */ 82 - 83 - #include <asm/cpufeature.h> 84 - 85 - .section .altinstructions,"a" 86 - .align 8 87 - .quad copy_page 88 - .quad copy_page_c 89 - .byte X86_FEATURE_K8_C 90 - .byte copy_page_c_end-copy_page_c 91 - .byte copy_page_c_end-copy_page_c 92 - .previous 93 - 94 - .section .altinstr_replacement,"ax" 95 - copy_page_c: 96 11 movl $4096/8,%ecx 97 12 rep 98 13 movsq 99 14 ret 100 - copy_page_c_end: 101 - .previous

+2 -91

arch/x86_64/lib/memcpy.S

··· 11 11 * 12 12 * Output: 13 13 * rax original destination 14 + * 15 + * TODO: check best memcpy for PSC 14 16 */ 15 17 16 18 .globl __memcpy ··· 20 18 .p2align 4 21 19 __memcpy: 22 20 memcpy: 23 - pushq %rbx 24 - movq %rdi,%rax 25 - 26 - movl %edx,%ecx 27 - shrl $6,%ecx 28 - jz .Lhandle_tail 29 - 30 - .p2align 4 31 - .Lloop_64: 32 - decl %ecx 33 - 34 - movq (%rsi),%r11 35 - movq 8(%rsi),%r8 36 - 37 - movq %r11,(%rdi) 38 - movq %r8,1*8(%rdi) 39 - 40 - movq 2*8(%rsi),%r9 41 - movq 3*8(%rsi),%r10 42 - 43 - movq %r9,2*8(%rdi) 44 - movq %r10,3*8(%rdi) 45 - 46 - movq 4*8(%rsi),%r11 47 - movq 5*8(%rsi),%r8 48 - 49 - movq %r11,4*8(%rdi) 50 - movq %r8,5*8(%rdi) 51 - 52 - movq 6*8(%rsi),%r9 53 - movq 7*8(%rsi),%r10 54 - 55 - movq %r9,6*8(%rdi) 56 - movq %r10,7*8(%rdi) 57 - 58 - leaq 64(%rsi),%rsi 59 - leaq 64(%rdi),%rdi 60 - jnz .Lloop_64 61 - 62 - .Lhandle_tail: 63 - movl %edx,%ecx 64 - andl $63,%ecx 65 - shrl $3,%ecx 66 - jz .Lhandle_7 67 - .p2align 4 68 - .Lloop_8: 69 - decl %ecx 70 - movq (%rsi),%r8 71 - movq %r8,(%rdi) 72 - leaq 8(%rdi),%rdi 73 - leaq 8(%rsi),%rsi 74 - jnz .Lloop_8 75 - 76 - .Lhandle_7: 77 - movl %edx,%ecx 78 - andl $7,%ecx 79 - jz .Lende 80 - .p2align 4 81 - .Lloop_1: 82 - movb (%rsi),%r8b 83 - movb %r8b,(%rdi) 84 - incq %rdi 85 - incq %rsi 86 - decl %ecx 87 - jnz .Lloop_1 88 - 89 - .Lende: 90 - popq %rbx 91 - ret 92 - .Lfinal: 93 - 94 - /* C stepping K8 run faster using the string copy instructions. 95 - It is also a lot simpler. Use this when possible */ 96 - 97 - .section .altinstructions,"a" 98 - .align 8 99 - .quad memcpy 100 - .quad memcpy_c 101 - .byte X86_FEATURE_K8_C 102 - .byte .Lfinal-memcpy 103 - .byte memcpy_c_end-memcpy_c 104 - .previous 105 - 106 - .section .altinstr_replacement,"ax" 107 - /* rdi destination 108 - * rsi source 109 - * rdx count 110 - */ 111 - memcpy_c: 112 21 movq %rdi,%rax 113 22 movl %edx,%ecx 114 23 shrl $3,%ecx ··· 30 117 rep 31 118 movsb 32 119 ret 33 - memcpy_c_end: 34 - .previous

-94

arch/x86_64/lib/memset.S

··· 13 13 .p2align 4 14 14 memset: 15 15 __memset: 16 - movq %rdi,%r10 17 - movq %rdx,%r11 18 - 19 - /* expand byte value */ 20 - movzbl %sil,%ecx 21 - movabs $0x0101010101010101,%rax 22 - mul %rcx /* with rax, clobbers rdx */ 23 - 24 - /* align dst */ 25 - movl %edi,%r9d 26 - andl $7,%r9d 27 - jnz .Lbad_alignment 28 - .Lafter_bad_alignment: 29 - 30 - movl %r11d,%ecx 31 - shrl $6,%ecx 32 - jz .Lhandle_tail 33 - 34 - .p2align 4 35 - .Lloop_64: 36 - decl %ecx 37 - movq %rax,(%rdi) 38 - movq %rax,8(%rdi) 39 - movq %rax,16(%rdi) 40 - movq %rax,24(%rdi) 41 - movq %rax,32(%rdi) 42 - movq %rax,40(%rdi) 43 - movq %rax,48(%rdi) 44 - movq %rax,56(%rdi) 45 - leaq 64(%rdi),%rdi 46 - jnz .Lloop_64 47 - 48 - /* Handle tail in loops. The loops should be faster than hard 49 - to predict jump tables. */ 50 - .p2align 4 51 - .Lhandle_tail: 52 - movl %r11d,%ecx 53 - andl $63&(~7),%ecx 54 - jz .Lhandle_7 55 - shrl $3,%ecx 56 - .p2align 4 57 - .Lloop_8: 58 - decl %ecx 59 - movq %rax,(%rdi) 60 - leaq 8(%rdi),%rdi 61 - jnz .Lloop_8 62 - 63 - .Lhandle_7: 64 - movl %r11d,%ecx 65 - andl $7,%ecx 66 - jz .Lende 67 - .p2align 4 68 - .Lloop_1: 69 - decl %ecx 70 - movb %al,(%rdi) 71 - leaq 1(%rdi),%rdi 72 - jnz .Lloop_1 73 - 74 - .Lende: 75 - movq %r10,%rax 76 - ret 77 - 78 - .Lbad_alignment: 79 - cmpq $7,%r11 80 - jbe .Lhandle_7 81 - movq %rax,(%rdi) /* unaligned store */ 82 - movq $8,%r8 83 - subq %r9,%r8 84 - addq %r8,%rdi 85 - subq %r8,%r11 86 - jmp .Lafter_bad_alignment 87 - 88 - /* C stepping K8 run faster using the string instructions. 89 - It is also a lot simpler. Use this when possible */ 90 - 91 - #include <asm/cpufeature.h> 92 - 93 - .section .altinstructions,"a" 94 - .align 8 95 - .quad memset 96 - .quad memset_c 97 - .byte X86_FEATURE_K8_C 98 - .byte memset_c_end-memset_c 99 - .byte memset_c_end-memset_c 100 - .previous 101 - 102 - .section .altinstr_replacement,"ax" 103 - /* rdi destination 104 - * rsi value 105 - * rdx count 106 - */ 107 - memset_c: 108 16 movq %rdi,%r9 109 17 movl %edx,%r8d 110 18 andl $7,%r8d ··· 29 121 stosb 30 122 movq %r9,%rax 31 123 ret 32 - memset_c_end: 33 - .previous

+7 -12

arch/x86_64/mm/fault.c

··· 308 308 unsigned long flags; 309 309 siginfo_t info; 310 310 311 - #ifdef CONFIG_CHECKING 312 - { 313 - unsigned long gs; 314 - struct x8664_pda *pda = cpu_pda + stack_smp_processor_id(); 315 - rdmsrl(MSR_GS_BASE, gs); 316 - if (gs != (unsigned long)pda) { 317 - wrmsrl(MSR_GS_BASE, pda); 318 - printk("page_fault: wrong gs %lx expected %p\n", gs, pda); 319 - } 320 - } 321 - #endif 322 - 323 311 /* get the address */ 324 312 __asm__("movq %%cr2,%0":"=r" (address)); 325 313 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, ··· 559 571 force_sig_info(SIGBUS, &info, tsk); 560 572 return; 561 573 } 574 + 575 + static int __init enable_pagefaulttrace(char *str) 576 + { 577 + page_fault_trace = 1; 578 + return 0; 579 + } 580 + __setup("pagefaulttrace", enable_pagefaulttrace);

+80 -51

arch/x86_64/mm/init.c

··· 22 22 #include <linux/pagemap.h> 23 23 #include <linux/bootmem.h> 24 24 #include <linux/proc_fs.h> 25 + #include <linux/pci.h> 25 26 26 27 #include <asm/processor.h> 27 28 #include <asm/system.h> ··· 37 36 #include <asm/mmu_context.h> 38 37 #include <asm/proto.h> 39 38 #include <asm/smp.h> 39 + #include <asm/sections.h> 40 40 41 41 #ifndef Dprintk 42 42 #define Dprintk(x...) 43 43 #endif 44 44 45 - #ifdef CONFIG_GART_IOMMU 46 - extern int swiotlb; 47 - #endif 48 - 49 - extern char _stext[]; 45 + static unsigned long dma_reserve __initdata; 50 46 51 47 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 52 48 ··· 83 85 } 84 86 85 87 /* References to section boundaries */ 86 - 87 - extern char _text, _etext, _edata, __bss_start, _end[]; 88 - extern char __init_begin, __init_end; 89 88 90 89 int after_bootmem; 91 90 ··· 303 308 table_end<<PAGE_SHIFT); 304 309 } 305 310 306 - extern struct x8664_pda cpu_pda[NR_CPUS]; 307 - 308 - /* Assumes all CPUs still execute in init_mm */ 309 - void zap_low_mappings(void) 311 + void __cpuinit zap_low_mappings(int cpu) 310 312 { 311 - pgd_t *pgd = pgd_offset_k(0UL); 312 - pgd_clear(pgd); 313 - flush_tlb_all(); 313 + if (cpu == 0) { 314 + pgd_t *pgd = pgd_offset_k(0UL); 315 + pgd_clear(pgd); 316 + } else { 317 + /* 318 + * For AP's, zap the low identity mappings by changing the cr3 319 + * to init_level4_pgt and doing local flush tlb all 320 + */ 321 + asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); 322 + } 323 + __flush_tlb_all(); 324 + } 325 + 326 + /* Compute zone sizes for the DMA and DMA32 zones in a node. */ 327 + __init void 328 + size_zones(unsigned long *z, unsigned long *h, 329 + unsigned long start_pfn, unsigned long end_pfn) 330 + { 331 + int i; 332 + unsigned long w; 333 + 334 + for (i = 0; i < MAX_NR_ZONES; i++) 335 + z[i] = 0; 336 + 337 + if (start_pfn < MAX_DMA_PFN) 338 + z[ZONE_DMA] = MAX_DMA_PFN - start_pfn; 339 + if (start_pfn < MAX_DMA32_PFN) { 340 + unsigned long dma32_pfn = MAX_DMA32_PFN; 341 + if (dma32_pfn > end_pfn) 342 + dma32_pfn = end_pfn; 343 + z[ZONE_DMA32] = dma32_pfn - start_pfn; 344 + } 345 + z[ZONE_NORMAL] = end_pfn - start_pfn; 346 + 347 + /* Remove lower zones from higher ones. */ 348 + w = 0; 349 + for (i = 0; i < MAX_NR_ZONES; i++) { 350 + if (z[i]) 351 + z[i] -= w; 352 + w += z[i]; 353 + } 354 + 355 + /* Compute holes */ 356 + w = 0; 357 + for (i = 0; i < MAX_NR_ZONES; i++) { 358 + unsigned long s = w; 359 + w += z[i]; 360 + h[i] = e820_hole_size(s, w); 361 + } 362 + 363 + /* Add the space pace needed for mem_map to the holes too. */ 364 + for (i = 0; i < MAX_NR_ZONES; i++) 365 + h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE; 366 + 367 + /* The 16MB DMA zone has the kernel and other misc mappings. 368 + Account them too */ 369 + if (h[ZONE_DMA]) { 370 + h[ZONE_DMA] += dma_reserve; 371 + if (h[ZONE_DMA] >= z[ZONE_DMA]) { 372 + printk(KERN_WARNING 373 + "Kernel too large and filling up ZONE_DMA?\n"); 374 + h[ZONE_DMA] = z[ZONE_DMA]; 375 + } 376 + } 314 377 } 315 378 316 379 #ifndef CONFIG_NUMA 317 380 void __init paging_init(void) 318 381 { 319 - { 320 - unsigned long zones_size[MAX_NR_ZONES]; 321 - unsigned long holes[MAX_NR_ZONES]; 322 - unsigned int max_dma; 323 - 324 - memset(zones_size, 0, sizeof(zones_size)); 325 - memset(holes, 0, sizeof(holes)); 326 - 327 - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; 328 - 329 - if (end_pfn < max_dma) { 330 - zones_size[ZONE_DMA] = end_pfn; 331 - holes[ZONE_DMA] = e820_hole_size(0, end_pfn); 332 - } else { 333 - zones_size[ZONE_DMA] = max_dma; 334 - holes[ZONE_DMA] = e820_hole_size(0, max_dma); 335 - zones_size[ZONE_NORMAL] = end_pfn - max_dma; 336 - holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn); 337 - } 338 - free_area_init_node(0, NODE_DATA(0), zones_size, 339 - __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); 340 - } 341 - return; 382 + unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; 383 + size_zones(zones, holes, 0, end_pfn); 384 + free_area_init_node(0, NODE_DATA(0), zones, 385 + __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); 342 386 } 343 387 #endif 344 388 ··· 472 438 datasize >> 10, 473 439 initsize >> 10); 474 440 441 + #ifdef CONFIG_SMP 475 442 /* 476 - * Subtle. SMP is doing its boot stuff late (because it has to 477 - * fork idle threads) - but it also needs low mappings for the 478 - * protected-mode entry to work. We zap these entries only after 479 - * the WP-bit has been tested. 443 + * Sync boot_level4_pgt mappings with the init_level4_pgt 444 + * except for the low identity mappings which are already zapped 445 + * in init_level4_pgt. This sync-up is essential for AP's bringup 480 446 */ 481 - #ifndef CONFIG_SMP 482 - zap_low_mappings(); 447 + memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); 483 448 #endif 484 449 } 485 - 486 - extern char __initdata_begin[], __initdata_end[]; 487 450 488 451 void free_initmem(void) 489 452 { ··· 495 464 totalram_pages++; 496 465 } 497 466 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); 498 - printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10); 467 + printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10); 499 468 } 500 469 501 470 #ifdef CONFIG_BLK_DEV_INITRD ··· 522 491 #else 523 492 reserve_bootmem(phys, len); 524 493 #endif 494 + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) 495 + dma_reserve += len / PAGE_SIZE; 525 496 } 526 497 527 498 int kern_addr_valid(unsigned long addr) ··· 565 532 static ctl_table debug_table2[] = { 566 533 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, 567 534 proc_dointvec }, 568 - #ifdef CONFIG_CHECKING 569 - { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL, 570 - proc_dointvec }, 571 - #endif 572 535 { 0, } 573 536 }; 574 537

+1

arch/x86_64/mm/k8topology.c

··· 108 108 limit >>= 16; 109 109 limit <<= 24; 110 110 limit |= (1<<24)-1; 111 + limit++; 111 112 112 113 if (limit > end_pfn << PAGE_SHIFT) 113 114 limit = end_pfn << PAGE_SHIFT;

+75 -51

arch/x86_64/mm/numa.c

··· 38 38 39 39 int numa_off __initdata; 40 40 41 - int __init compute_hash_shift(struct node *nodes, int numnodes) 41 + 42 + /* 43 + * Given a shift value, try to populate memnodemap[] 44 + * Returns : 45 + * 1 if OK 46 + * 0 if memnodmap[] too small (of shift too small) 47 + * -1 if node overlap or lost ram (shift too big) 48 + */ 49 + static int __init populate_memnodemap( 50 + const struct node *nodes, int numnodes, int shift) 42 51 { 43 52 int i; 44 - int shift = 20; 45 - unsigned long addr,maxend=0; 46 - 47 - for (i = 0; i < numnodes; i++) 48 - if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend)) 49 - maxend = nodes[i].end; 53 + int res = -1; 54 + unsigned long addr, end; 50 55 51 - while ((1UL << shift) < (maxend / NODEMAPSIZE)) 56 + memset(memnodemap, 0xff, sizeof(memnodemap)); 57 + for (i = 0; i < numnodes; i++) { 58 + addr = nodes[i].start; 59 + end = nodes[i].end; 60 + if (addr >= end) 61 + continue; 62 + if ((end >> shift) >= NODEMAPSIZE) 63 + return 0; 64 + do { 65 + if (memnodemap[addr >> shift] != 0xff) 66 + return -1; 67 + memnodemap[addr >> shift] = i; 68 + addr += (1 << shift); 69 + } while (addr < end); 70 + res = 1; 71 + } 72 + return res; 73 + } 74 + 75 + int __init compute_hash_shift(struct node *nodes, int numnodes) 76 + { 77 + int shift = 20; 78 + 79 + while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) 52 80 shift++; 53 81 54 - printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n", 55 - shift,maxend); 56 - memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); 57 - for (i = 0; i < numnodes; i++) { 58 - if (nodes[i].start == nodes[i].end) 59 - continue; 60 - for (addr = nodes[i].start; 61 - addr < nodes[i].end; 62 - addr += (1UL << shift)) { 63 - if (memnodemap[addr >> shift] != 0xff) { 64 - printk(KERN_INFO 82 + printk(KERN_DEBUG "Using %d for the hash shift.\n", 83 + shift); 84 + 85 + if (populate_memnodemap(nodes, numnodes, shift) != 1) { 86 + printk(KERN_INFO 65 87 "Your memory is not aligned you need to rebuild your kernel " 66 - "with a bigger NODEMAPSIZE shift=%d adder=%lu\n", 67 - shift,addr); 68 - return -1; 69 - } 70 - memnodemap[addr >> shift] = i; 71 - } 72 - } 88 + "with a bigger NODEMAPSIZE shift=%d\n", 89 + shift); 90 + return -1; 91 + } 73 92 return shift; 74 93 } 75 94 ··· 113 94 start_pfn = start >> PAGE_SHIFT; 114 95 end_pfn = end >> PAGE_SHIFT; 115 96 116 - memory_present(nodeid, start_pfn, end_pfn); 117 97 nodedata_phys = find_e820_area(start, end, pgdat_size); 118 98 if (nodedata_phys == -1L) 119 99 panic("Cannot find memory pgdat in node %d\n", nodeid); ··· 150 132 unsigned long start_pfn, end_pfn; 151 133 unsigned long zones[MAX_NR_ZONES]; 152 134 unsigned long holes[MAX_NR_ZONES]; 153 - unsigned long dma_end_pfn; 154 135 155 - memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 156 - memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); 136 + start_pfn = node_start_pfn(nodeid); 137 + end_pfn = node_end_pfn(nodeid); 157 138 158 - start_pfn = node_start_pfn(nodeid); 159 - end_pfn = node_end_pfn(nodeid); 139 + Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", 140 + nodeid, start_pfn, end_pfn); 160 141 161 - Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); 162 - 163 - /* All nodes > 0 have a zero length zone DMA */ 164 - dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; 165 - if (start_pfn < dma_end_pfn) { 166 - zones[ZONE_DMA] = dma_end_pfn - start_pfn; 167 - holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn); 168 - zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; 169 - holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn); 170 - 171 - } else { 172 - zones[ZONE_NORMAL] = end_pfn - start_pfn; 173 - holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn); 174 - } 175 - 142 + size_zones(zones, holes, start_pfn, end_pfn); 176 143 free_area_init_node(nodeid, NODE_DATA(nodeid), zones, 177 144 start_pfn, holes); 178 145 } ··· 174 171 for (i = 0; i < NR_CPUS; i++) { 175 172 if (cpu_to_node[i] != NUMA_NO_NODE) 176 173 continue; 177 - cpu_to_node[i] = rr; 174 + numa_set_node(i, rr); 178 175 rr = next_node(rr, node_online_map); 179 176 if (rr == MAX_NUMNODES) 180 177 rr = first_node(node_online_map); ··· 208 205 if (i == numa_fake-1) 209 206 sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; 210 207 nodes[i].end = nodes[i].start + sz; 211 - if (i != numa_fake-1) 212 - nodes[i].end--; 213 208 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", 214 209 i, 215 210 nodes[i].start, nodes[i].end, ··· 258 257 nodes_clear(node_online_map); 259 258 node_set_online(0); 260 259 for (i = 0; i < NR_CPUS; i++) 261 - cpu_to_node[i] = 0; 260 + numa_set_node(i, 0); 262 261 node_to_cpumask[0] = cpumask_of_cpu(0); 263 262 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); 264 263 } ··· 267 266 { 268 267 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); 269 268 } 269 + 270 + void __cpuinit numa_set_node(int cpu, int node) 271 + { 272 + cpu_pda[cpu].nodenumber = node; 273 + cpu_to_node[cpu] = node; 274 + } 270 275 271 276 unsigned long __init numa_free_all_bootmem(void) 272 277 { ··· 284 277 return pages; 285 278 } 286 279 280 + #ifdef CONFIG_SPARSEMEM 281 + static void __init arch_sparse_init(void) 282 + { 283 + int i; 284 + 285 + for_each_online_node(i) 286 + memory_present(i, node_start_pfn(i), node_end_pfn(i)); 287 + 288 + sparse_init(); 289 + } 290 + #else 291 + #define arch_sparse_init() do {} while (0) 292 + #endif 293 + 287 294 void __init paging_init(void) 288 295 { 289 296 int i; 297 + 298 + arch_sparse_init(); 299 + 290 300 for_each_online_node(i) { 291 301 setup_node_zones(i); 292 302 }

+1 -5

arch/x86_64/mm/srat.c

··· 71 71 nd->start = nd->end; 72 72 } 73 73 if (nd->end > end) { 74 - if (!(end & 0xfff)) 75 - end--; 76 74 nd->end = end; 77 75 if (nd->start > nd->end) 78 76 nd->start = nd->end; ··· 164 166 if (nd->end < end) 165 167 nd->end = end; 166 168 } 167 - if (!(nd->end & 0xfff)) 168 - nd->end--; 169 169 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, 170 170 nd->start, nd->end); 171 171 } ··· 199 203 if (cpu_to_node[i] == NUMA_NO_NODE) 200 204 continue; 201 205 if (!node_isset(cpu_to_node[i], nodes_parsed)) 202 - cpu_to_node[i] = NUMA_NO_NODE; 206 + numa_set_node(i, NUMA_NO_NODE); 203 207 } 204 208 numa_init_array(); 205 209 return 0;

+14 -3

drivers/char/agp/amd64-agp.c

··· 57 57 static struct pci_dev * hammers[MAX_HAMMER_GARTS]; 58 58 59 59 static struct resource *aperture_resource; 60 - static int __initdata agp_try_unsupported; 60 + static int __initdata agp_try_unsupported = 1; 61 61 62 - static int gart_iterator; 63 62 #define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++) 64 63 65 64 static void flush_amd64_tlb(struct pci_dev *dev) ··· 72 73 73 74 static void amd64_tlbflush(struct agp_memory *temp) 74 75 { 76 + int gart_iterator; 75 77 for_each_nb() 76 78 flush_amd64_tlb(hammers[gart_iterator]); 77 79 } ··· 222 222 static int amd_8151_configure(void) 223 223 { 224 224 unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real); 225 + int gart_iterator; 225 226 226 227 /* Configure AGP regs in each x86-64 host bridge. */ 227 228 for_each_nb() { ··· 236 235 static void amd64_cleanup(void) 237 236 { 238 237 u32 tmp; 239 - 238 + int gart_iterator; 240 239 for_each_nb() { 241 240 /* disable gart translation */ 242 241 pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp); ··· 698 697 .subvendor = PCI_ANY_ID, 699 698 .subdevice = PCI_ANY_ID, 700 699 }, 700 + /* ALI/ULI M1695 */ 701 + { 702 + .class = (PCI_CLASS_BRIDGE_HOST << 8), 703 + .class_mask = ~0, 704 + .vendor = PCI_VENDOR_ID_AL, 705 + .device = 0x1689, 706 + .subvendor = PCI_ANY_ID, 707 + .subdevice = PCI_ANY_ID, 708 + }, 709 + 701 710 { } 702 711 }; 703 712

+1

include/asm-generic/sections.h

··· 13 13 extern char _end[]; 14 14 extern char __per_cpu_start[], __per_cpu_end[]; 15 15 extern char __kprobes_text_start[], __kprobes_text_end[]; 16 + extern char __initdata_begin[], __initdata_end[]; 16 17 17 18 #endif /* _ASM_GENERIC_SECTIONS_H_ */

+1 -1

include/asm-i386/mach-default/mach_reboot.h

··· 19 19 static inline void mach_reboot(void) 20 20 { 21 21 int i; 22 - for (i = 0; i < 100; i++) { 22 + for (i = 0; i < 10; i++) { 23 23 kb_wait(); 24 24 udelay(50); 25 25 outb(0x60, 0x64); /* write Controller Command Byte */

+3 -1

include/asm-i386/processor.h

··· 65 65 int f00f_bug; 66 66 int coma_bug; 67 67 unsigned long loops_per_jiffy; 68 - unsigned char x86_num_cores; 68 + unsigned char x86_max_cores; /* cpuid returned max cores value */ 69 + unsigned char booted_cores; /* number of cores as seen by OS */ 70 + unsigned char apicid; 69 71 } __attribute__((__aligned__(SMP_CACHE_BYTES))); 70 72 71 73 #define X86_VENDOR_INTEL 0

+2

include/asm-x86_64/apic.h

··· 111 111 112 112 extern int disable_timer_pin_1; 113 113 114 + extern void setup_threshold_lvt(unsigned long lvt_off); 115 + 114 116 #endif /* CONFIG_X86_LOCAL_APIC */ 115 117 116 118 extern unsigned boot_cpu_id;

+1 -1

include/asm-x86_64/cache.h

··· 9 9 /* L1 cache line size */ 10 10 #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) 11 11 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 12 - #define L1_CACHE_SHIFT_MAX 6 /* largest L1 which this arch supports */ 12 + #define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */ 13 13 14 14 #endif

+3

include/asm-x86_64/desc.h

··· 98 98 99 99 static inline void set_intr_gate(int nr, void *func) 100 100 { 101 + BUG_ON((unsigned)nr > 0xFF); 101 102 _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); 102 103 } 103 104 104 105 static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 105 106 { 107 + BUG_ON((unsigned)nr > 0xFF); 106 108 _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); 107 109 } 108 110 109 111 static inline void set_system_gate(int nr, void *func) 110 112 { 113 + BUG_ON((unsigned)nr > 0xFF); 111 114 _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 112 115 } 113 116

+9 -2

include/asm-x86_64/dma.h

··· 72 72 73 73 #define MAX_DMA_CHANNELS 8 74 74 75 - /* The maximum address that we can perform a DMA transfer to on this platform */ 76 - #define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000) 75 + 76 + /* 16MB ISA DMA zone */ 77 + #define MAX_DMA_PFN ((16*1024*1024) >> PAGE_SHIFT) 78 + 79 + /* 4GB broken PCI/AGP hardware bus master zone */ 80 + #define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT) 81 + 82 + /* Compat define for old dma zone */ 83 + #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) 77 84 78 85 /* 8237 DMA controllers */ 79 86 #define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */

+21 -14

include/asm-x86_64/hpet.h

··· 14 14 #define HPET_CFG 0x010 15 15 #define HPET_STATUS 0x020 16 16 #define HPET_COUNTER 0x0f0 17 - #define HPET_T0_CFG 0x100 18 - #define HPET_T0_CMP 0x108 19 - #define HPET_T0_ROUTE 0x110 20 - #define HPET_T1_CFG 0x120 21 - #define HPET_T1_CMP 0x128 22 - #define HPET_T1_ROUTE 0x130 23 - #define HPET_T2_CFG 0x140 24 - #define HPET_T2_CMP 0x148 25 - #define HPET_T2_ROUTE 0x150 17 + #define HPET_Tn_OFFSET 0x20 18 + #define HPET_Tn_CFG(n) (0x100 + (n) * HPET_Tn_OFFSET) 19 + #define HPET_Tn_ROUTE(n) (0x104 + (n) * HPET_Tn_OFFSET) 20 + #define HPET_Tn_CMP(n) (0x108 + (n) * HPET_Tn_OFFSET) 21 + #define HPET_T0_CFG HPET_Tn_CFG(0) 22 + #define HPET_T0_CMP HPET_Tn_CMP(0) 23 + #define HPET_T1_CFG HPET_Tn_CFG(1) 24 + #define HPET_T1_CMP HPET_Tn_CMP(1) 26 25 27 26 #define HPET_ID_VENDOR 0xffff0000 28 27 #define HPET_ID_LEGSUP 0x00008000 28 + #define HPET_ID_64BIT 0x00002000 29 29 #define HPET_ID_NUMBER 0x00001f00 30 30 #define HPET_ID_REV 0x000000ff 31 31 #define HPET_ID_NUMBER_SHIFT 8 ··· 38 38 #define HPET_LEGACY_8254 2 39 39 #define HPET_LEGACY_RTC 8 40 40 41 - #define HPET_TN_ENABLE 0x004 42 - #define HPET_TN_PERIODIC 0x008 43 - #define HPET_TN_PERIODIC_CAP 0x010 44 - #define HPET_TN_SETVAL 0x040 45 - #define HPET_TN_32BIT 0x100 41 + #define HPET_TN_LEVEL 0x0002 42 + #define HPET_TN_ENABLE 0x0004 43 + #define HPET_TN_PERIODIC 0x0008 44 + #define HPET_TN_PERIODIC_CAP 0x0010 45 + #define HPET_TN_64BIT_CAP 0x0020 46 + #define HPET_TN_SETVAL 0x0040 47 + #define HPET_TN_32BIT 0x0100 48 + #define HPET_TN_ROUTE 0x3e00 49 + #define HPET_TN_FSB 0x4000 50 + #define HPET_TN_FSB_CAP 0x8000 51 + 52 + #define HPET_TN_ROUTE_SHIFT 9 46 53 47 54 extern int is_hpet_enabled(void); 48 55 extern int hpet_rtc_timer_init(void);

+1 -1

include/asm-x86_64/hw_irq.h

··· 55 55 #define CALL_FUNCTION_VECTOR 0xfc 56 56 #define KDB_VECTOR 0xfb /* reserved for KDB */ 57 57 #define THERMAL_APIC_VECTOR 0xfa 58 - /* 0xf9 free */ 58 + #define THRESHOLD_APIC_VECTOR 0xf9 59 59 #define INVALIDATE_TLB_VECTOR_END 0xf8 60 60 #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f8 used for TLB flush */ 61 61

+5

include/asm-x86_64/ia32.h

··· 165 165 int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info); 166 166 int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info); 167 167 int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs); 168 + 169 + struct linux_binprm; 170 + extern int ia32_setup_arg_pages(struct linux_binprm *bprm, 171 + unsigned long stack_top, int exec_stack); 172 + 168 173 #endif 169 174 170 175 #endif /* !CONFIG_IA32_SUPPORT */

+10

include/asm-x86_64/mce.h

··· 67 67 /* Software defined banks */ 68 68 #define MCE_EXTENDED_BANK 128 69 69 #define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 70 + #define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */ 71 + #define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4 70 72 71 73 void mce_log(struct mce *m); 72 74 #ifdef CONFIG_X86_MCE_INTEL 73 75 void mce_intel_feature_init(struct cpuinfo_x86 *c); 74 76 #else 75 77 static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) 78 + { 79 + } 80 + #endif 81 + 82 + #ifdef CONFIG_X86_MCE_AMD 83 + void mce_amd_feature_init(struct cpuinfo_x86 *c); 84 + #else 85 + static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) 76 86 { 77 87 } 78 88 #endif

+3 -6

include/asm-x86_64/mmzone.h

··· 17 17 /* Simple perfect hash to map physical addresses to node numbers */ 18 18 extern int memnode_shift; 19 19 extern u8 memnodemap[NODEMAPSIZE]; 20 - extern int maxnode; 21 20 22 21 extern struct pglist_data *node_data[]; 23 22 24 23 static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) 25 24 { 26 - int nid; 25 + unsigned nid; 27 26 VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE); 28 27 nid = memnodemap[addr >> memnode_shift]; 29 - VIRTUAL_BUG_ON(nid > maxnode); 28 + VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); 30 29 return nid; 31 30 } 32 31 ··· 40 41 #define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT) 41 42 #define kvaddr_to_nid(kaddr) phys_to_nid(__pa(kaddr)) 42 43 43 - /* AK: this currently doesn't deal with invalid addresses. We'll see 44 - if the 2.5 kernel doesn't pass them 45 - (2.4 used to). */ 44 + /* Requires pfn_valid(pfn) to be true */ 46 45 #define pfn_to_page(pfn) ({ \ 47 46 int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); \ 48 47 ((pfn) - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; \

+4 -3

include/asm-x86_64/mpspec.h

··· 16 16 /* 17 17 * A maximum of 255 APICs with the current APIC ID architecture. 18 18 */ 19 - #define MAX_APICS 128 19 + #define MAX_APICS 255 20 20 21 21 struct intel_mp_floating 22 22 { ··· 157 157 */ 158 158 159 159 #define MAX_MP_BUSSES 256 160 - #define MAX_IRQ_SOURCES 256 160 + /* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ 161 + #define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) 161 162 enum mp_bustype { 162 163 MP_BUS_ISA = 1, 163 164 MP_BUS_EISA, ··· 173 172 extern void find_smp_config (void); 174 173 extern void get_smp_config (void); 175 174 extern int nr_ioapics; 176 - extern int apic_version [MAX_APICS]; 175 + extern unsigned char apic_version [MAX_APICS]; 177 176 extern int mp_irq_entries; 178 177 extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; 179 178 extern int mpc_default_type;

+1 -1

include/asm-x86_64/msr.h

··· 19 19 : "=a" (a__), "=d" (b__) \ 20 20 : "c" (msr)); \ 21 21 val = a__ | (b__<<32); \ 22 - } while(0); 22 + } while(0) 23 23 24 24 #define wrmsr(msr,val1,val2) \ 25 25 __asm__ __volatile__("wrmsr" \

+2

include/asm-x86_64/numa.h

··· 17 17 extern void numa_init_array(void); 18 18 extern int numa_off; 19 19 20 + extern void numa_set_node(int cpu, int node); 21 + 20 22 extern unsigned char apicid_to_node[256]; 21 23 22 24 #define NUMA_NO_NODE 0xff

+1 -1

include/asm-x86_64/page.h

··· 11 11 #define PAGE_SIZE (1UL << PAGE_SHIFT) 12 12 #endif 13 13 #define PAGE_MASK (~(PAGE_SIZE-1)) 14 - #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & (__PHYSICAL_MASK << PAGE_SHIFT)) 14 + #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) 15 15 16 16 #define THREAD_ORDER 1 17 17 #ifdef __ASSEMBLY__

+1

include/asm-x86_64/pda.h

··· 15 15 int irqcount; /* Irq nesting counter. Starts with -1 */ 16 16 int cpunumber; /* Logical CPU number */ 17 17 char *irqstackptr; /* top of irqstack */ 18 + int nodenumber; /* number of current node */ 18 19 unsigned int __softirq_pending; 19 20 unsigned int __nmi_count; /* number of NMI on this CPUs */ 20 21 struct mm_struct *active_mm;

+3 -2

include/asm-x86_64/pgtable.h

··· 16 16 extern pud_t level3_ident_pgt[512]; 17 17 extern pmd_t level2_kernel_pgt[512]; 18 18 extern pgd_t init_level4_pgt[]; 19 + extern pgd_t boot_level4_pgt[]; 19 20 extern unsigned long __supported_pte_mask; 20 21 21 22 #define swapper_pg_dir init_level4_pgt ··· 248 247 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this 249 248 right? */ 250 249 #define pte_page(x) pfn_to_page(pte_pfn(x)) 251 - #define pte_pfn(x) ((pte_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK) 250 + #define pte_pfn(x) ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) 252 251 253 252 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) 254 253 { ··· 355 354 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) 356 355 #define pmd_bad(x) ((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE ) 357 356 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) 358 - #define pmd_pfn(x) ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK) 357 + #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) 359 358 360 359 #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) 361 360 #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })

+3 -1

include/asm-x86_64/processor.h

··· 61 61 int x86_cache_alignment; 62 62 int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ 63 63 __u8 x86_virt_bits, x86_phys_bits; 64 - __u8 x86_num_cores; 64 + __u8 x86_max_cores; /* cpuid returned max cores value */ 65 65 __u32 x86_power; 66 66 __u32 extended_cpuid_level; /* Max extended CPUID function supported */ 67 67 unsigned long loops_per_jiffy; 68 + __u8 apicid; 69 + __u8 booted_cores; /* number of cores as seen by OS */ 68 70 } ____cacheline_aligned; 69 71 70 72 #define X86_VENDOR_INTEL 0

+4

include/asm-x86_64/proto.h

··· 11 11 extern void start_kernel(void); 12 12 extern void pda_init(int); 13 13 14 + extern void zap_low_mappings(int cpu); 15 + 14 16 extern void early_idt_handler(void); 15 17 16 18 extern void mcheck_init(struct cpuinfo_x86 *c); ··· 24 22 #define mtrr_bp_init() do {} while (0) 25 23 #endif 26 24 extern void init_memory_mapping(unsigned long start, unsigned long end); 25 + extern void size_zones(unsigned long *z, unsigned long *h, 26 + unsigned long start_pfn, unsigned long end_pfn); 27 27 28 28 extern void system_call(void); 29 29 extern int kernel_syscall(void);

-283

include/asm-x86_64/rwsem.h

··· 1 - /* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for x86_64+ 2 - * 3 - * Written by David Howells (dhowells@redhat.com). 4 - * Ported by Andi Kleen <ak@suse.de> to x86-64. 5 - * 6 - * Derived from asm-i386/semaphore.h and asm-i386/rwsem.h 7 - * 8 - * 9 - * The MSW of the count is the negated number of active writers and waiting 10 - * lockers, and the LSW is the total number of active locks 11 - * 12 - * The lock count is initialized to 0 (no active and no waiting lockers). 13 - * 14 - * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an 15 - * uncontended lock. This can be determined because XADD returns the old value. 16 - * Readers increment by 1 and see a positive value when uncontended, negative 17 - * if there are writers (and maybe) readers waiting (in which case it goes to 18 - * sleep). 19 - * 20 - * The value of WAITING_BIAS supports up to 32766 waiting processes. This can 21 - * be extended to 65534 by manually checking the whole MSW rather than relying 22 - * on the S flag. 23 - * 24 - * The value of ACTIVE_BIAS supports up to 65535 active processes. 25 - * 26 - * This should be totally fair - if anything is waiting, a process that wants a 27 - * lock will go to the back of the queue. When the currently active lock is 28 - * released, if there's a writer at the front of the queue, then that and only 29 - * that will be woken up; if there's a bunch of consecutive readers at the 30 - * front, then they'll all be woken up, but no other readers will be. 31 - */ 32 - 33 - #ifndef _X8664_RWSEM_H 34 - #define _X8664_RWSEM_H 35 - 36 - #ifndef _LINUX_RWSEM_H 37 - #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" 38 - #endif 39 - 40 - #ifdef __KERNEL__ 41 - 42 - #include <linux/list.h> 43 - #include <linux/spinlock.h> 44 - 45 - struct rwsem_waiter; 46 - 47 - extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); 48 - extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); 49 - extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); 50 - extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); 51 - 52 - /* 53 - * the semaphore definition 54 - */ 55 - struct rw_semaphore { 56 - signed int count; 57 - #define RWSEM_UNLOCKED_VALUE 0x00000000 58 - #define RWSEM_ACTIVE_BIAS 0x00000001 59 - #define RWSEM_ACTIVE_MASK 0x0000ffff 60 - #define RWSEM_WAITING_BIAS (-0x00010000) 61 - #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 62 - #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 63 - spinlock_t wait_lock; 64 - struct list_head wait_list; 65 - #if RWSEM_DEBUG 66 - int debug; 67 - #endif 68 - }; 69 - 70 - /* 71 - * initialisation 72 - */ 73 - #if RWSEM_DEBUG 74 - #define __RWSEM_DEBUG_INIT , 0 75 - #else 76 - #define __RWSEM_DEBUG_INIT /* */ 77 - #endif 78 - 79 - #define __RWSEM_INITIALIZER(name) \ 80 - { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ 81 - __RWSEM_DEBUG_INIT } 82 - 83 - #define DECLARE_RWSEM(name) \ 84 - struct rw_semaphore name = __RWSEM_INITIALIZER(name) 85 - 86 - static inline void init_rwsem(struct rw_semaphore *sem) 87 - { 88 - sem->count = RWSEM_UNLOCKED_VALUE; 89 - spin_lock_init(&sem->wait_lock); 90 - INIT_LIST_HEAD(&sem->wait_list); 91 - #if RWSEM_DEBUG 92 - sem->debug = 0; 93 - #endif 94 - } 95 - 96 - /* 97 - * lock for reading 98 - */ 99 - static inline void __down_read(struct rw_semaphore *sem) 100 - { 101 - __asm__ __volatile__( 102 - "# beginning down_read\n\t" 103 - LOCK_PREFIX " incl (%%rdi)\n\t" /* adds 0x00000001, returns the old value */ 104 - " js 2f\n\t" /* jump if we weren't granted the lock */ 105 - "1:\n\t" 106 - LOCK_SECTION_START("") \ 107 - "2:\n\t" 108 - " call rwsem_down_read_failed_thunk\n\t" 109 - " jmp 1b\n" 110 - LOCK_SECTION_END \ 111 - "# ending down_read\n\t" 112 - : "+m"(sem->count) 113 - : "D"(sem) 114 - : "memory", "cc"); 115 - } 116 - 117 - 118 - /* 119 - * trylock for reading -- returns 1 if successful, 0 if contention 120 - */ 121 - static inline int __down_read_trylock(struct rw_semaphore *sem) 122 - { 123 - __s32 result, tmp; 124 - __asm__ __volatile__( 125 - "# beginning __down_read_trylock\n\t" 126 - " movl %0,%1\n\t" 127 - "1:\n\t" 128 - " movl %1,%2\n\t" 129 - " addl %3,%2\n\t" 130 - " jle 2f\n\t" 131 - LOCK_PREFIX " cmpxchgl %2,%0\n\t" 132 - " jnz 1b\n\t" 133 - "2:\n\t" 134 - "# ending __down_read_trylock\n\t" 135 - : "+m"(sem->count), "=&a"(result), "=&r"(tmp) 136 - : "i"(RWSEM_ACTIVE_READ_BIAS) 137 - : "memory", "cc"); 138 - return result>=0 ? 1 : 0; 139 - } 140 - 141 - 142 - /* 143 - * lock for writing 144 - */ 145 - static inline void __down_write(struct rw_semaphore *sem) 146 - { 147 - int tmp; 148 - 149 - tmp = RWSEM_ACTIVE_WRITE_BIAS; 150 - __asm__ __volatile__( 151 - "# beginning down_write\n\t" 152 - LOCK_PREFIX " xaddl %0,(%%rdi)\n\t" /* subtract 0x0000ffff, returns the old value */ 153 - " testl %0,%0\n\t" /* was the count 0 before? */ 154 - " jnz 2f\n\t" /* jump if we weren't granted the lock */ 155 - "1:\n\t" 156 - LOCK_SECTION_START("") 157 - "2:\n\t" 158 - " call rwsem_down_write_failed_thunk\n\t" 159 - " jmp 1b\n" 160 - LOCK_SECTION_END 161 - "# ending down_write" 162 - : "=&r" (tmp) 163 - : "0"(tmp), "D"(sem) 164 - : "memory", "cc"); 165 - } 166 - 167 - /* 168 - * trylock for writing -- returns 1 if successful, 0 if contention 169 - */ 170 - static inline int __down_write_trylock(struct rw_semaphore *sem) 171 - { 172 - signed long ret = cmpxchg(&sem->count, 173 - RWSEM_UNLOCKED_VALUE, 174 - RWSEM_ACTIVE_WRITE_BIAS); 175 - if (ret == RWSEM_UNLOCKED_VALUE) 176 - return 1; 177 - return 0; 178 - } 179 - 180 - /* 181 - * unlock after reading 182 - */ 183 - static inline void __up_read(struct rw_semaphore *sem) 184 - { 185 - __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; 186 - __asm__ __volatile__( 187 - "# beginning __up_read\n\t" 188 - LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* subtracts 1, returns the old value */ 189 - " js 2f\n\t" /* jump if the lock is being waited upon */ 190 - "1:\n\t" 191 - LOCK_SECTION_START("") 192 - "2:\n\t" 193 - " decw %w[tmp]\n\t" /* do nothing if still outstanding active readers */ 194 - " jnz 1b\n\t" 195 - " call rwsem_wake_thunk\n\t" 196 - " jmp 1b\n" 197 - LOCK_SECTION_END 198 - "# ending __up_read\n" 199 - : "+m"(sem->count), [tmp] "+r" (tmp) 200 - : "D"(sem) 201 - : "memory", "cc"); 202 - } 203 - 204 - /* 205 - * unlock after writing 206 - */ 207 - static inline void __up_write(struct rw_semaphore *sem) 208 - { 209 - unsigned tmp; 210 - __asm__ __volatile__( 211 - "# beginning __up_write\n\t" 212 - " movl %[bias],%[tmp]\n\t" 213 - LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ 214 - " jnz 2f\n\t" /* jump if the lock is being waited upon */ 215 - "1:\n\t" 216 - LOCK_SECTION_START("") 217 - "2:\n\t" 218 - " decw %w[tmp]\n\t" /* did the active count reduce to 0? */ 219 - " jnz 1b\n\t" /* jump back if not */ 220 - " call rwsem_wake_thunk\n\t" 221 - " jmp 1b\n" 222 - LOCK_SECTION_END 223 - "# ending __up_write\n" 224 - : "+m"(sem->count), [tmp] "=r" (tmp) 225 - : "D"(sem), [bias] "i"(-RWSEM_ACTIVE_WRITE_BIAS) 226 - : "memory", "cc"); 227 - } 228 - 229 - /* 230 - * downgrade write lock to read lock 231 - */ 232 - static inline void __downgrade_write(struct rw_semaphore *sem) 233 - { 234 - __asm__ __volatile__( 235 - "# beginning __downgrade_write\n\t" 236 - LOCK_PREFIX " addl %[bias],(%%rdi)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ 237 - " js 2f\n\t" /* jump if the lock is being waited upon */ 238 - "1:\n\t" 239 - LOCK_SECTION_START("") 240 - "2:\n\t" 241 - " call rwsem_downgrade_thunk\n" 242 - " jmp 1b\n" 243 - LOCK_SECTION_END 244 - "# ending __downgrade_write\n" 245 - : "=m"(sem->count) 246 - : "D"(sem), [bias] "i"(-RWSEM_WAITING_BIAS), "m"(sem->count) 247 - : "memory", "cc"); 248 - } 249 - 250 - /* 251 - * implement atomic add functionality 252 - */ 253 - static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) 254 - { 255 - __asm__ __volatile__( 256 - LOCK_PREFIX "addl %1,%0" 257 - :"=m"(sem->count) 258 - :"ir"(delta), "m"(sem->count)); 259 - } 260 - 261 - /* 262 - * implement exchange and add functionality 263 - */ 264 - static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) 265 - { 266 - int tmp = delta; 267 - 268 - __asm__ __volatile__( 269 - LOCK_PREFIX "xaddl %0,(%2)" 270 - : "=r"(tmp), "=m"(sem->count) 271 - : "r"(sem), "m"(sem->count), "0" (tmp) 272 - : "memory"); 273 - 274 - return tmp+delta; 275 - } 276 - 277 - static inline int rwsem_is_locked(struct rw_semaphore *sem) 278 - { 279 - return (sem->count != 0); 280 - } 281 - 282 - #endif /* __KERNEL__ */ 283 - #endif /* _X8664_RWSEM_H */

+2 -1

include/asm-x86_64/smp.h

··· 47 47 extern void unlock_ipi_call_lock(void); 48 48 extern int smp_num_siblings; 49 49 extern void smp_send_reschedule(int cpu); 50 - extern void zap_low_mappings(void); 51 50 void smp_stop_cpu(void); 52 51 extern int smp_call_function_single(int cpuid, void (*func) (void *info), 53 52 void *info, int retry, int wait); ··· 81 82 extern int __cpu_disable(void); 82 83 extern void __cpu_die(unsigned int cpu); 83 84 extern void prefill_possible_map(void); 85 + extern unsigned num_processors; 86 + extern unsigned disabled_cpus; 84 87 85 88 #endif /* !ASSEMBLY */ 86 89

+6 -6

include/asm-x86_64/spinlock.h

··· 18 18 */ 19 19 20 20 #define __raw_spin_is_locked(x) \ 21 - (*(volatile signed char *)(&(x)->slock) <= 0) 21 + (*(volatile signed int *)(&(x)->slock) <= 0) 22 22 23 23 #define __raw_spin_lock_string \ 24 24 "\n1:\t" \ 25 - "lock ; decb %0\n\t" \ 25 + "lock ; decl %0\n\t" \ 26 26 "js 2f\n" \ 27 27 LOCK_SECTION_START("") \ 28 28 "2:\t" \ 29 29 "rep;nop\n\t" \ 30 - "cmpb $0,%0\n\t" \ 30 + "cmpl $0,%0\n\t" \ 31 31 "jle 2b\n\t" \ 32 32 "jmp 1b\n" \ 33 33 LOCK_SECTION_END 34 34 35 35 #define __raw_spin_unlock_string \ 36 - "movb $1,%0" \ 36 + "movl $1,%0" \ 37 37 :"=m" (lock->slock) : : "memory" 38 38 39 39 static inline void __raw_spin_lock(raw_spinlock_t *lock) ··· 47 47 48 48 static inline int __raw_spin_trylock(raw_spinlock_t *lock) 49 49 { 50 - char oldval; 50 + int oldval; 51 51 52 52 __asm__ __volatile__( 53 - "xchgb %b0,%1" 53 + "xchgl %0,%1" 54 54 :"=q" (oldval), "=m" (lock->slock) 55 55 :"0" (0) : "memory"); 56 56

+2

include/asm-x86_64/topology.h

··· 28 28 #define pcibus_to_node(bus) ((long)(bus->sysdata)) 29 29 #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)); 30 30 31 + #define numa_node_id() read_pda(nodenumber) 32 + 31 33 /* sched_domains SD_NODE_INIT for x86_64 machines */ 32 34 #define SD_NODE_INIT (struct sched_domain) { \ 33 35 .span = CPU_MASK_NONE, \

+2 -1

include/asm-x86_64/unistd.h

··· 462 462 #define __NR_tkill 200 463 463 __SYSCALL(__NR_tkill, sys_tkill) 464 464 #define __NR_time 201 465 - __SYSCALL(__NR_time, sys_time64) 465 + __SYSCALL(__NR_time, sys_time) 466 466 #define __NR_futex 202 467 467 __SYSCALL(__NR_futex, sys_futex) 468 468 #define __NR_sched_setaffinity 203 ··· 608 608 #define __ARCH_WANT_SYS_SIGPENDING 609 609 #define __ARCH_WANT_SYS_SIGPROCMASK 610 610 #define __ARCH_WANT_SYS_RT_SIGACTION 611 + #define __ARCH_WANT_SYS_TIME 611 612 #define __ARCH_WANT_COMPAT_SYS_TIME 612 613 #endif 613 614

+10

include/linux/bitops.h

··· 84 84 return order; /* We could be slightly more clever with -1 here... */ 85 85 } 86 86 87 + static __inline__ int get_count_order(unsigned int count) 88 + { 89 + int order; 90 + 91 + order = fls(count) - 1; 92 + if (count & (count - 1)) 93 + order++; 94 + return order; 95 + } 96 + 87 97 /* 88 98 * hweightN: returns the hamming weight (i.e. the number 89 99 * of bits set) of a N-bit word

+11

include/linux/gfp.h

··· 14 14 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ 15 15 #define __GFP_DMA ((__force gfp_t)0x01u) 16 16 #define __GFP_HIGHMEM ((__force gfp_t)0x02u) 17 + #ifdef CONFIG_DMA_IS_DMA32 18 + #define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */ 19 + #elif BITS_PER_LONG < 64 20 + #define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */ 21 + #else 22 + #define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */ 23 + #endif 17 24 18 25 /* 19 26 * Action modifiers - doesn't change the zoning ··· 69 62 platforms, used as appropriate on others */ 70 63 71 64 #define GFP_DMA __GFP_DMA 65 + 66 + /* 4GB DMA on some platforms */ 67 + #define GFP_DMA32 __GFP_DMA32 68 + 72 69 73 70 #define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK)) 74 71

+2 -8

include/linux/mm.h

··· 206 206 struct mmu_gather; 207 207 struct inode; 208 208 209 - #ifdef ARCH_HAS_ATOMIC_UNSIGNED 210 - typedef unsigned page_flags_t; 211 - #else 212 - typedef unsigned long page_flags_t; 213 - #endif 214 - 215 209 /* 216 210 * Each physical page in the system has a struct page associated with 217 211 * it to keep track of whatever it is we are using the page for at the ··· 213 219 * a page. 214 220 */ 215 221 struct page { 216 - page_flags_t flags; /* Atomic flags, some possibly 222 + unsigned long flags; /* Atomic flags, some possibly 217 223 * updated asynchronously */ 218 224 atomic_t _count; /* Usage count, see below. */ 219 225 atomic_t _mapcount; /* Count of ptes mapped in mms, ··· 429 435 #endif 430 436 431 437 /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ 432 - #define SECTIONS_PGOFF ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH) 438 + #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) 433 439 #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) 434 440 #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) 435 441

+12 -8

include/linux/mmzone.h

··· 71 71 #endif 72 72 73 73 #define ZONE_DMA 0 74 - #define ZONE_NORMAL 1 75 - #define ZONE_HIGHMEM 2 74 + #define ZONE_DMA32 1 75 + #define ZONE_NORMAL 2 76 + #define ZONE_HIGHMEM 3 76 77 77 - #define MAX_NR_ZONES 3 /* Sync this with ZONES_SHIFT */ 78 + #define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ 78 79 #define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ 79 80 80 81 ··· 109 108 110 109 /* 111 110 * On machines where it is needed (eg PCs) we divide physical memory 112 - * into multiple physical zones. On a PC we have 3 zones: 111 + * into multiple physical zones. On a PC we have 4 zones: 113 112 * 114 113 * ZONE_DMA < 16 MB ISA DMA capable memory 114 + * ZONE_DMA32 0 MB Empty 115 115 * ZONE_NORMAL 16-896 MB direct mapped by the kernel 116 116 * ZONE_HIGHMEM > 896 MB only page cache and user processes 117 117 */ ··· 435 433 436 434 #include <linux/topology.h> 437 435 /* Returns the number of the current Node. */ 436 + #ifndef numa_node_id 438 437 #define numa_node_id() (cpu_to_node(raw_smp_processor_id())) 438 + #endif 439 439 440 440 #ifndef CONFIG_NEED_MULTIPLE_NODES 441 441 ··· 457 453 #include <asm/sparsemem.h> 458 454 #endif 459 455 460 - #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) 456 + #if BITS_PER_LONG == 32 461 457 /* 462 - * with 32 bit page->flags field, we reserve 8 bits for node/zone info. 463 - * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes. 458 + * with 32 bit page->flags field, we reserve 9 bits for node/zone info. 459 + * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes. 464 460 */ 465 - #define FLAGS_RESERVED 8 461 + #define FLAGS_RESERVED 9 466 462 467 463 #elif BITS_PER_LONG == 64 468 464 /*

+1 -1

mm/filemap.c

··· 134 134 struct address_space *mapping; 135 135 struct page *page; 136 136 137 - page = container_of((page_flags_t *)word, struct page, flags); 137 + page = container_of((unsigned long *)word, struct page, flags); 138 138 139 139 /* 140 140 * page_mapping() is being called without PG_locked held.

+14 -6

mm/page_alloc.c

··· 60 60 * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA 61 61 * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL 62 62 * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA 63 + * 64 + * TBD: should special case ZONE_DMA32 machines here - in those we normally 65 + * don't need any ZONE_NORMAL reservation 63 66 */ 64 - int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 }; 67 + int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 }; 65 68 66 69 EXPORT_SYMBOL(totalram_pages); 67 70 ··· 75 72 struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; 76 73 EXPORT_SYMBOL(zone_table); 77 74 78 - static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; 75 + static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; 79 76 int min_free_kbytes = 1024; 80 77 81 78 unsigned long __initdata nr_kernel_pages; ··· 127 124 printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n", 128 125 function, current->comm, page); 129 126 printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", 130 - (int)(2*sizeof(page_flags_t)), (unsigned long)page->flags, 127 + (int)(2*sizeof(unsigned long)), (unsigned long)page->flags, 131 128 page->mapping, page_mapcount(page), page_count(page)); 132 129 printk(KERN_EMERG "Backtrace:\n"); 133 130 dump_stack(); ··· 1424 1421 zone = pgdat->node_zones + ZONE_NORMAL; 1425 1422 if (zone->present_pages) 1426 1423 zonelist->zones[j++] = zone; 1424 + case ZONE_DMA32: 1425 + zone = pgdat->node_zones + ZONE_DMA32; 1426 + if (zone->present_pages) 1427 + zonelist->zones[j++] = zone; 1427 1428 case ZONE_DMA: 1428 1429 zone = pgdat->node_zones + ZONE_DMA; 1429 1430 if (zone->present_pages) ··· 1442 1435 int res = ZONE_NORMAL; 1443 1436 if (zone_bits & (__force int)__GFP_HIGHMEM) 1444 1437 res = ZONE_HIGHMEM; 1438 + if (zone_bits & (__force int)__GFP_DMA32) 1439 + res = ZONE_DMA32; 1445 1440 if (zone_bits & (__force int)__GFP_DMA) 1446 1441 res = ZONE_DMA; 1447 1442 return res; ··· 1855 1846 if (process_zones(cpu)) 1856 1847 ret = NOTIFY_BAD; 1857 1848 break; 1858 - #ifdef CONFIG_HOTPLUG_CPU 1849 + case CPU_UP_CANCELED: 1859 1850 case CPU_DEAD: 1860 1851 free_zone_pagesets(cpu); 1861 1852 break; 1862 - #endif 1863 1853 default: 1864 1854 break; 1865 1855 } ··· 1963 1955 if (zholes_size) 1964 1956 realsize -= zholes_size[j]; 1965 1957 1966 - if (j == ZONE_DMA || j == ZONE_NORMAL) 1958 + if (j < ZONE_HIGHMEM) 1967 1959 nr_kernel_pages += realsize; 1968 1960 nr_all_pages += realsize; 1969 1961