···7788 mce=off disable machine check99 mce=bootlog Enable logging of machine checks left over from booting.1010- Disabled by default because some BIOS leave bogus ones.1010+ Disabled by default on AMD because some BIOS leave bogus ones.1111 If your BIOS doesn't do that it's a good idea to enable though1212 to make sure you log even machine check events that result1313- in a reboot.1313+ in a reboot. On Intel systems it is enabled by default.1414+ mce=nobootlog1515+ Disable boot machine check logging.1416 mce=tolerancelevel (number)1517 0: always panic, 1: panic if deadlock possible,1618 2: try to avoid panic, 3: never panic or exit (for testing)···124122125123 cpumask=MASK only use cpus with bits set in mask126124125125+ additional_cpus=NUM Allow NUM more CPUs for hotplug126126+ (defaults are specified by the BIOS or half the available CPUs)127127+127128NUMA128129129130 numa=off Only set up a single NUMA node spanning all memory.···192187 Useful together with panic=30 to trigger a reboot.193188194189 kstack=N Print that many words from the kernel stack in oops dumps.190190+191191+ pagefaulttrace Dump all page faults. Only useful for extreme debugging192192+ and will create a lot of output.195193196194Misc197195
+5-1
Documentation/x86_64/mm.txt
···660000000000000000 - 00007fffffffffff (=47bits) user space, different per mm77hole caused by [48:63] sign extension88ffff800000000000 - ffff80ffffffffff (=40bits) guard hole99-ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of phys. memory99+ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of all phys. memory1010ffffc10000000000 - ffffc1ffffffffff (=40bits) hole1111ffffc20000000000 - ffffe1ffffffffff (=45bits) vmalloc/ioremap space1212... unused hole ...1313ffffffff80000000 - ffffffff82800000 (=40MB) kernel text mapping, from phys 01414... unused hole ...1515ffffffff88000000 - fffffffffff00000 (=1919MB) module mapping space1616+1717+The direct mapping covers all memory in the system upto the highest1818+memory address (this means in some cases it can also include PCI memory1919+holes)16201721vmalloc space is lazily synchronized into the different PML4 pages of1822the processes using the page fault handler, with init_level4_pgt as
···7272/* Core ID of each logical CPU */7373int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};74747575+/* representing HT siblings of each logical CPU */7576cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;7677EXPORT_SYMBOL(cpu_sibling_map);77787979+/* representing HT and core siblings of each logical CPU */7880cpumask_t cpu_core_map[NR_CPUS] __read_mostly;7981EXPORT_SYMBOL(cpu_core_map);8082···444442445443static int cpucount;446444445445+/* representing cpus for which sibling maps can be computed */446446+static cpumask_t cpu_sibling_setup_map;447447+447448static inline void448449set_cpu_sibling_map(int cpu)449450{450451 int i;452452+ struct cpuinfo_x86 *c = cpu_data;453453+454454+ cpu_set(cpu, cpu_sibling_setup_map);451455452456 if (smp_num_siblings > 1) {453453- for (i = 0; i < NR_CPUS; i++) {454454- if (!cpu_isset(i, cpu_callout_map))455455- continue;456456- if (cpu_core_id[cpu] == cpu_core_id[i]) {457457+ for_each_cpu_mask(i, cpu_sibling_setup_map) {458458+ if (phys_proc_id[cpu] == phys_proc_id[i] &&459459+ cpu_core_id[cpu] == cpu_core_id[i]) {457460 cpu_set(i, cpu_sibling_map[cpu]);458461 cpu_set(cpu, cpu_sibling_map[i]);462462+ cpu_set(i, cpu_core_map[cpu]);463463+ cpu_set(cpu, cpu_core_map[i]);459464 }460465 }461466 } else {462467 cpu_set(cpu, cpu_sibling_map[cpu]);463468 }464469465465- if (current_cpu_data.x86_num_cores > 1) {466466- for (i = 0; i < NR_CPUS; i++) {467467- if (!cpu_isset(i, cpu_callout_map))468468- continue;469469- if (phys_proc_id[cpu] == phys_proc_id[i]) {470470- cpu_set(i, cpu_core_map[cpu]);471471- cpu_set(cpu, cpu_core_map[i]);472472- }473473- }474474- } else {470470+ if (current_cpu_data.x86_max_cores == 1) {475471 cpu_core_map[cpu] = cpu_sibling_map[cpu];472472+ c[cpu].booted_cores = 1;473473+ return;474474+ }475475+476476+ for_each_cpu_mask(i, cpu_sibling_setup_map) {477477+ if (phys_proc_id[cpu] == phys_proc_id[i]) {478478+ cpu_set(i, cpu_core_map[cpu]);479479+ cpu_set(cpu, cpu_core_map[i]);480480+ /*481481+ * Does this new cpu bringup a new core?482482+ */483483+ if (cpus_weight(cpu_sibling_map[cpu]) == 1) {484484+ /*485485+ * for each core in package, increment486486+ * the booted_cores for this new cpu487487+ */488488+ if (first_cpu(cpu_sibling_map[i]) == i)489489+ c[cpu].booted_cores++;490490+ /*491491+ * increment the core count for all492492+ * the other cpus in this package493493+ */494494+ if (i != cpu)495495+ c[i].booted_cores++;496496+ } else if (i != cpu && !c[cpu].booted_cores)497497+ c[cpu].booted_cores = c[i].booted_cores;498498+ }476499 }477500}478501···1122109511231096 current_thread_info()->cpu = 0;11241097 smp_tune_scheduling();11251125- cpus_clear(cpu_sibling_map[0]);11261126- cpu_set(0, cpu_sibling_map[0]);1127109811281128- cpus_clear(cpu_core_map[0]);11291129- cpu_set(0, cpu_core_map[0]);10991099+ set_cpu_sibling_map(0);1130110011311101 /*11321102 * If we couldn't find an SMP configuration at boot time,···13021278remove_siblinginfo(int cpu)13031279{13041280 int sibling;12811281+ struct cpuinfo_x86 *c = cpu_data;1305128212831283+ for_each_cpu_mask(sibling, cpu_core_map[cpu]) {12841284+ cpu_clear(cpu, cpu_core_map[sibling]);12851285+ /*12861286+ * last thread sibling in this cpu core going down12871287+ */12881288+ if (cpus_weight(cpu_sibling_map[cpu]) == 1)12891289+ c[sibling].booted_cores--;12901290+ }12911291+13061292 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])13071293 cpu_clear(cpu, cpu_sibling_map[sibling]);13081308- for_each_cpu_mask(sibling, cpu_core_map[cpu])13091309- cpu_clear(cpu, cpu_core_map[sibling]);13101294 cpus_clear(cpu_sibling_map[cpu]);13111295 cpus_clear(cpu_core_map[cpu]);13121296 phys_proc_id[cpu] = BAD_APICID;13131297 cpu_core_id[cpu] = BAD_APICID;12981298+ cpu_clear(cpu, cpu_sibling_setup_map);13141299}1315130013161301int __cpu_disable(void)
+2-2
arch/i386/kernel/srat.c
···137137 "enabled and removable" : "enabled" ) );138138}139139140140-#if MAX_NR_ZONES != 3141141-#error "MAX_NR_ZONES != 3, chunk_to_zone requires review"140140+#if MAX_NR_ZONES != 4141141+#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"142142#endif143143/* Take a chunk of pages from page frame cstart to cend and count the number144144 * of pages in each zone, returned via zones[].
···226226227227source "kernel/Kconfig.preempt"228228229229-config K8_NUMA230230- bool "K8 NUMA support"231231- select NUMA229229+config NUMA230230+ bool "Non Uniform Memory Access (NUMA) Support"232231 depends on SMP233232 help234234- Enable NUMA (Non Unified Memory Architecture) support for235235- AMD Opteron Multiprocessor systems. The kernel will try to allocate236236- memory used by a CPU on the local memory controller of the CPU237237- and add some more NUMA awareness to the kernel.238238- This code is recommended on all multiprocessor Opteron systems239239- and normally doesn't hurt on others.233233+ Enable NUMA (Non Uniform Memory Access) support. The kernel 234234+ will try to allocate memory used by a CPU on the local memory 235235+ controller of the CPU and add some more NUMA awareness to the kernel.236236+ This code is recommended on all multiprocessor Opteron systems.237237+ If the system is EM64T, you should say N unless your system is EM64T 238238+ NUMA. 239239+240240+config K8_NUMA241241+ bool "Old style AMD Opteron NUMA detection"242242+ depends on NUMA243243+ default y244244+ help245245+ Enable K8 NUMA node topology detection. You should say Y here if246246+ you have a multi processor AMD K8 system. This uses an old247247+ method to read the NUMA configurtion directly from the builtin248248+ Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA249249+ instead, which also takes priority if both are compiled in. 250250+251251+# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.252252+253253+config X86_64_ACPI_NUMA254254+ bool "ACPI NUMA detection"255255+ depends on NUMA256256+ select ACPI 257257+ select ACPI_NUMA258258+ default y259259+ help260260+ Enable ACPI SRAT based node topology detection.240261241262config NUMA_EMU242242- bool "NUMA emulation support"243243- select NUMA244244- depends on SMP263263+ bool "NUMA emulation"264264+ depends on NUMA245265 help246266 Enable NUMA emulation. A flat machine will be split247267 into virtual nodes when booted with "numa=fake=N", where N is the···272252 depends on NUMA273253 default y274254275275-config NUMA276276- bool277277- default n278255279256config ARCH_DISCONTIGMEM_ENABLE280257 def_bool y···390373 help391374 Additional support for intel specific MCE features such as392375 the thermal monitor.376376+377377+config X86_MCE_AMD378378+ bool "AMD MCE features"379379+ depends on X86_MCE && X86_LOCAL_APIC380380+ default y381381+ help382382+ Additional support for AMD specific MCE features such as383383+ the DRAM Error Threshold.393384394385config PHYSICAL_START395386 hex "Physical address where the kernel is loaded" if EMBEDDED···527502 left.528503529504config IA32_AOUT530530- bool "IA32 a.out support"505505+ tristate "IA32 a.out support"531506 depends on IA32_EMULATION532507 help533508 Support old a.out binaries in the 32bit emulation.
-9
arch/x86_64/Kconfig.debug
···2233source "lib/Kconfig.debug"4455-# !SMP for now because the context switch early causes GPF in segment reloading66-# and the GS base checking does the wrong thing then, causing a hang.77-config CHECKING88- bool "Additional run-time checks"99- depends on DEBUG_KERNEL && !SMP1010- help1111- Enables some internal consistency checks for kernel debugging.1212- You should normally say N.1313-145config INIT_DEBUG156 bool "Debug __init statements"167 depends on DEBUG_KERNEL
+83-15
arch/x86_64/defconfig
···11#22# Automatically generated make config: don't edit33-# Linux kernel version: 2.6.13-git1144-# Mon Sep 12 16:16:16 200533+# Linux kernel version: 2.6.14-git744+# Sat Nov 5 15:55:50 200555#66CONFIG_X86_64=y77CONFIG_64BIT=y···3535# CONFIG_BSD_PROCESS_ACCT is not set3636CONFIG_SYSCTL=y3737# CONFIG_AUDIT is not set3838-# CONFIG_HOTPLUG is not set3838+CONFIG_HOTPLUG=y3939CONFIG_KOBJECT_UEVENT=y4040CONFIG_IKCONFIG=y4141CONFIG_IKCONFIG_PROC=y···9393# CONFIG_PREEMPT_VOLUNTARY is not set9494# CONFIG_PREEMPT is not set9595CONFIG_PREEMPT_BKL=y9696+CONFIG_NUMA=y9697CONFIG_K8_NUMA=y9898+CONFIG_X86_64_ACPI_NUMA=y9799# CONFIG_NUMA_EMU is not set98100CONFIG_ARCH_DISCONTIGMEM_ENABLE=y9999-CONFIG_NUMA=y100101CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y101102CONFIG_ARCH_SPARSEMEM_ENABLE=y102103CONFIG_SELECT_MEMORY_MODEL=y···108107CONFIG_FLAT_NODE_MEM_MAP=y109108CONFIG_NEED_MULTIPLE_NODES=y110109# CONFIG_SPARSEMEM_STATIC is not set110110+CONFIG_SPLIT_PTLOCK_CPUS=4111111CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y112112-CONFIG_HAVE_DEC_LOCK=y113112CONFIG_NR_CPUS=32113113+CONFIG_HOTPLUG_CPU=y114114CONFIG_HPET_TIMER=y115115CONFIG_X86_PM_TIMER=y116116CONFIG_HPET_EMULATE_RTC=y···119117CONFIG_SWIOTLB=y120118CONFIG_X86_MCE=y121119CONFIG_X86_MCE_INTEL=y120120+CONFIG_X86_MCE_AMD=y122121CONFIG_PHYSICAL_START=0x100000123122# CONFIG_KEXEC is not set124123CONFIG_SECCOMP=y···139136# CONFIG_PM_DEBUG is not set140137CONFIG_SOFTWARE_SUSPEND=y141138CONFIG_PM_STD_PARTITION=""139139+CONFIG_SUSPEND_SMP=y142140143141#144142# ACPI (Advanced Configuration and Power Interface) Support145143#146144CONFIG_ACPI=y145145+CONFIG_ACPI_SLEEP=y146146+CONFIG_ACPI_SLEEP_PROC_FS=y147147+CONFIG_ACPI_SLEEP_PROC_SLEEP=y147148CONFIG_ACPI_AC=y148149CONFIG_ACPI_BATTERY=y149150CONFIG_ACPI_BUTTON=y···155148CONFIG_ACPI_HOTKEY=m156149CONFIG_ACPI_FAN=y157150CONFIG_ACPI_PROCESSOR=y151151+CONFIG_ACPI_HOTPLUG_CPU=y158152CONFIG_ACPI_THERMAL=y159153CONFIG_ACPI_NUMA=y160154# CONFIG_ACPI_ASUS is not set···166158CONFIG_ACPI_EC=y167159CONFIG_ACPI_POWER=y168160CONFIG_ACPI_SYSTEM=y169169-# CONFIG_ACPI_CONTAINER is not set161161+CONFIG_ACPI_CONTAINER=y170162171163#172164# CPU Frequency scaling···301293# Network testing302294#303295# CONFIG_NET_PKTGEN is not set304304-# CONFIG_NETFILTER_NETLINK is not set305296# CONFIG_HAMRADIO is not set306297# CONFIG_IRDA is not set307298# CONFIG_BT is not set···317310CONFIG_PREVENT_FIRMWARE_BUILD=y318311# CONFIG_FW_LOADER is not set319312# CONFIG_DEBUG_DRIVER is not set313313+314314+#315315+# Connector - unified userspace <-> kernelspace linker316316+#317317+# CONFIG_CONNECTOR is not set320318321319#322320# Memory Technology Devices (MTD)···366354# CONFIG_IOSCHED_AS is not set367355CONFIG_IOSCHED_DEADLINE=y368356CONFIG_IOSCHED_CFQ=y357357+# CONFIG_DEFAULT_AS is not set358358+CONFIG_DEFAULT_DEADLINE=y359359+# CONFIG_DEFAULT_CFQ is not set360360+# CONFIG_DEFAULT_NOOP is not set361361+CONFIG_DEFAULT_IOSCHED="cfq"369362# CONFIG_ATA_OVER_ETH is not set370363371364#···467450CONFIG_SCSI_SPI_ATTRS=y468451# CONFIG_SCSI_FC_ATTRS is not set469452# CONFIG_SCSI_ISCSI_ATTRS is not set453453+# CONFIG_SCSI_SAS_ATTRS is not set470454471455#472456# SCSI low-level drivers···487469# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set488470# CONFIG_MEGARAID_NEWGEN is not set489471# CONFIG_MEGARAID_LEGACY is not set472472+# CONFIG_MEGARAID_SAS is not set490473CONFIG_SCSI_SATA=y491474# CONFIG_SCSI_SATA_AHCI is not set492475# CONFIG_SCSI_SATA_SVW is not set493476CONFIG_SCSI_ATA_PIIX=y494477# CONFIG_SCSI_SATA_MV is not set495495-# CONFIG_SCSI_SATA_NV is not set496496-# CONFIG_SCSI_SATA_PROMISE is not set478478+CONFIG_SCSI_SATA_NV=y479479+# CONFIG_SCSI_PDC_ADMA is not set497480# CONFIG_SCSI_SATA_QSTOR is not set481481+# CONFIG_SCSI_SATA_PROMISE is not set498482# CONFIG_SCSI_SATA_SX4 is not set499483# CONFIG_SCSI_SATA_SIL is not set484484+# CONFIG_SCSI_SATA_SIL24 is not set500485# CONFIG_SCSI_SATA_SIS is not set501486# CONFIG_SCSI_SATA_ULI is not set502487CONFIG_SCSI_SATA_VIA=y503488# CONFIG_SCSI_SATA_VITESSE is not set489489+CONFIG_SCSI_SATA_INTEL_COMBINED=y504490# CONFIG_SCSI_BUSLOGIC is not set505491# CONFIG_SCSI_DMX3191D is not set506492# CONFIG_SCSI_EATA is not set···547525CONFIG_FUSION=y548526CONFIG_FUSION_SPI=y549527# CONFIG_FUSION_FC is not set528528+# CONFIG_FUSION_SAS is not set550529CONFIG_FUSION_MAX_SGE=128551530# CONFIG_FUSION_CTL is not set552531···587564CONFIG_MII=y588565# CONFIG_HAPPYMEAL is not set589566# CONFIG_SUNGEM is not set567567+# CONFIG_CASSINI is not set590568CONFIG_NET_VENDOR_3COM=y591569CONFIG_VORTEX=y592570# CONFIG_TYPHOON is not set···764740#765741# Watchdog Cards766742#767767-# CONFIG_WATCHDOG is not set743743+CONFIG_WATCHDOG=y744744+# CONFIG_WATCHDOG_NOWAYOUT is not set745745+746746+#747747+# Watchdog Device Drivers748748+#749749+CONFIG_SOFT_WATCHDOG=y750750+# CONFIG_ACQUIRE_WDT is not set751751+# CONFIG_ADVANTECH_WDT is not set752752+# CONFIG_ALIM1535_WDT is not set753753+# CONFIG_ALIM7101_WDT is not set754754+# CONFIG_SC520_WDT is not set755755+# CONFIG_EUROTECH_WDT is not set756756+# CONFIG_IB700_WDT is not set757757+# CONFIG_IBMASR is not set758758+# CONFIG_WAFER_WDT is not set759759+# CONFIG_I6300ESB_WDT is not set760760+# CONFIG_I8XX_TCO is not set761761+# CONFIG_SC1200_WDT is not set762762+# CONFIG_60XX_WDT is not set763763+# CONFIG_SBC8360_WDT is not set764764+# CONFIG_CPU5_WDT is not set765765+# CONFIG_W83627HF_WDT is not set766766+# CONFIG_W83877F_WDT is not set767767+# CONFIG_W83977F_WDT is not set768768+# CONFIG_MACHZ_WDT is not set769769+770770+#771771+# PCI-based Watchdog Cards772772+#773773+# CONFIG_PCIPCWATCHDOG is not set774774+# CONFIG_WDTPCI is not set775775+776776+#777777+# USB-based Watchdog Cards778778+#779779+# CONFIG_USBPCWATCHDOG is not set768780CONFIG_HW_RANDOM=y769781# CONFIG_NVRAM is not set770782CONFIG_RTC=y···827767# TPM devices828768#829769# CONFIG_TCG_TPM is not set770770+# CONFIG_TELCLOCK is not set830771831772#832773# I2C support···844783#845784CONFIG_HWMON=y846785# CONFIG_HWMON_VID is not set786786+# CONFIG_SENSORS_HDAPS is not set847787# CONFIG_HWMON_DEBUG_CHIP is not set848788849789#···948886# USB Device Class drivers949887#950888# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set951951-# CONFIG_USB_BLUETOOTH_TTY is not set952889# CONFIG_USB_ACM is not set953890CONFIG_USB_PRINTER=y954891955892#956956-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information893893+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'894894+#895895+896896+#897897+# may also be needed; see USB_STORAGE Help for more information957898#958899CONFIG_USB_STORAGE=y959900# CONFIG_USB_STORAGE_DEBUG is not set···989924# CONFIG_USB_XPAD is not set990925# CONFIG_USB_ATI_REMOTE is not set991926# CONFIG_USB_KEYSPAN_REMOTE is not set927927+# CONFIG_USB_APPLETOUCH is not set992928993929#994930# USB Imaging devices···10711005#10721006# CONFIG_EDD is not set10731007# CONFIG_DELL_RBU is not set10741074-CONFIG_DCDBAS=m10081008+# CONFIG_DCDBAS is not set1075100910761010#10771011# File systems···11031037# CONFIG_QUOTA is not set11041038CONFIG_DNOTIFY=y11051039CONFIG_AUTOFS_FS=y11061106-# CONFIG_AUTOFS4_FS is not set10401040+CONFIG_AUTOFS4_FS=y11071041# CONFIG_FUSE_FS is not set1108104211091043#···11341068CONFIG_HUGETLBFS=y11351069CONFIG_HUGETLB_PAGE=y11361070CONFIG_RAMFS=y11371137-# CONFIG_RELAYFS_FS is not set10711071+CONFIG_RELAYFS_FS=y1138107211391073#11401074# Miscellaneous filesystems···12521186# CONFIG_DEBUG_KOBJECT is not set12531187# CONFIG_DEBUG_INFO is not set12541188CONFIG_DEBUG_FS=y11891189+# CONFIG_DEBUG_VM is not set12551190# CONFIG_FRAME_POINTER is not set11911191+# CONFIG_RCU_TORTURE_TEST is not set12561192CONFIG_INIT_DEBUG=y12571193# CONFIG_IOMMU_DEBUG is not set12581194CONFIG_KPROBES=y
-3
arch/x86_64/ia32/ia32_aout.c
···3636#undef WARN_OLD3737#undef CORE_DUMP /* probably broken */38383939-extern int ia32_setup_arg_pages(struct linux_binprm *bprm,4040- unsigned long stack_top, int exec_stack);4141-4239static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);4340static int load_aout_library(struct file*);4441
+3-1
arch/x86_64/ia32/ia32_binfmt.c
···335335 me->thread.es = __USER_DS;336336}337337338338-int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack)338338+int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,339339+ int executable_stack)339340{340341 unsigned long stack_base;341342 struct vm_area_struct *mpnt;···390389391390 return 0;392391}392392+EXPORT_SYMBOL(ia32_setup_arg_pages);393393394394static unsigned long395395elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
···5757 * Rough estimation of how many shared IRQs there are, can5858 * be changed anytime.5959 */6060-#define MAX_PLUS_SHARED_IRQS NR_IRQS6060+#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS6161#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)62626363/*···8585 int pin; \8686 struct irq_pin_list *entry = irq_2_pin + irq; \8787 \8888+ BUG_ON(irq >= NR_IRQS); \8889 for (;;) { \8990 unsigned int reg; \9091 pin = entry->pin; \···128127}129128#endif130129130130+static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };131131+131132/*132133 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are133134 * shared ISA-space IRQs, so we have to support them. We are super···140137 static int first_free_entry = NR_IRQS;141138 struct irq_pin_list *entry = irq_2_pin + irq;142139140140+ BUG_ON(irq >= NR_IRQS);143141 while (entry->next)144142 entry = irq_2_pin + entry->next;145143···148144 entry->next = first_free_entry;149145 entry = irq_2_pin + entry->next;150146 if (++first_free_entry >= PIN_MAP_SIZE)151151- panic("io_apic.c: whoops");147147+ panic("io_apic.c: ran out of irq_2_pin entries!");152148 }153149 entry->apic = apic;154150 entry->pin = pin;···424420 best_guess = irq;425421 }426422 }423423+ BUG_ON(best_guess >= NR_IRQS);427424 return best_guess;428425}429426···615610 return MPBIOS_trigger(idx);616611}617612613613+static int next_irq = 16;614614+615615+/*616616+ * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ617617+ * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number618618+ * from ACPI, which can reach 800 in large boxen.619619+ *620620+ * Compact the sparse GSI space into a sequential IRQ series and reuse621621+ * vectors if possible.622622+ */623623+int gsi_irq_sharing(int gsi)624624+{625625+ int i, tries, vector;626626+627627+ BUG_ON(gsi >= NR_IRQ_VECTORS);628628+629629+ if (platform_legacy_irq(gsi))630630+ return gsi;631631+632632+ if (gsi_2_irq[gsi] != 0xFF)633633+ return (int)gsi_2_irq[gsi];634634+635635+ tries = NR_IRQS;636636+ try_again:637637+ vector = assign_irq_vector(gsi);638638+639639+ /*640640+ * Sharing vectors means sharing IRQs, so scan irq_vectors for previous641641+ * use of vector and if found, return that IRQ. However, we never want642642+ * to share legacy IRQs, which usually have a different trigger mode643643+ * than PCI.644644+ */645645+ for (i = 0; i < NR_IRQS; i++)646646+ if (IO_APIC_VECTOR(i) == vector)647647+ break;648648+ if (platform_legacy_irq(i)) {649649+ if (--tries >= 0) {650650+ IO_APIC_VECTOR(i) = 0;651651+ goto try_again;652652+ }653653+ panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);654654+ }655655+ if (i < NR_IRQS) {656656+ gsi_2_irq[gsi] = i;657657+ printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",658658+ gsi, vector, i);659659+ return i;660660+ }661661+662662+ i = next_irq++;663663+ BUG_ON(i >= NR_IRQS);664664+ gsi_2_irq[gsi] = i;665665+ IO_APIC_VECTOR(i) = vector;666666+ printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",667667+ gsi, vector, i);668668+ return i;669669+}670670+618671static int pin_2_irq(int idx, int apic, int pin)619672{620673 int irq, i;···702639 while (i < apic)703640 irq += nr_ioapic_registers[i++];704641 irq += pin;642642+ irq = gsi_irq_sharing(irq);705643 break;706644 }707645 default:···712648 break;713649 }714650 }651651+ BUG_ON(irq >= NR_IRQS);715652716653 /*717654 * PCI IRQ command line redirection. Yes, limits are hardcoded.···728663 }729664 }730665 }666666+ BUG_ON(irq >= NR_IRQS);731667 return irq;732668}733669···756690{757691 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;758692759759- BUG_ON(irq >= NR_IRQ_VECTORS);760760- if (IO_APIC_VECTOR(irq) > 0)693693+ BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);694694+ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)761695 return IO_APIC_VECTOR(irq);762696next:763697 current_vector += 8;···765699 goto next;766700767701 if (current_vector >= FIRST_SYSTEM_VECTOR) {768768- offset++;769769- if (!(offset%8))770770- return -ENOSPC;702702+ /* If we run out of vectors on large boxen, must share them. */703703+ offset = (offset + 1) % 8;771704 current_vector = FIRST_DEVICE_VECTOR + offset;772705 }773706···19821917 entry.polarity = active_high_low;19831918 entry.mask = 1; /* Disabled (masked) */1984191919201920+ irq = gsi_irq_sharing(irq);19851921 /*19861922 * IRQs < 16 are already in the irq_2_pin[] map19871923 */
+12-5
arch/x86_64/kernel/mce.c
···3737static unsigned long console_logged;3838static int notify_user;3939static int rip_msr;4040-static int mce_bootlog;4040+static int mce_bootlog = 1;41414242/*4343 * Lockless MCE logging infrastructure.···347347 /* disable GART TBL walk error reporting, which trips off 348348 incorrectly with the IOMMU & 3ware & Cerberus. */349349 clear_bit(10, &bank[4]);350350+ /* Lots of broken BIOS around that don't clear them351351+ by default and leave crap in there. Don't log. */352352+ mce_bootlog = 0;350353 }354354+351355} 352356353357static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)···359355 switch (c->x86_vendor) {360356 case X86_VENDOR_INTEL:361357 mce_intel_feature_init(c);358358+ break;359359+ case X86_VENDOR_AMD:360360+ mce_amd_feature_init(c);362361 break;363362 default:364363 break;···502495/* mce=off disables machine check. Note you can reenable it later503496 using sysfs.504497 mce=TOLERANCELEVEL (number, see above)505505- mce=bootlog Log MCEs from before booting. Disabled by default to work506506- around buggy BIOS that leave bogus MCEs. */498498+ mce=bootlog Log MCEs from before booting. Disabled by default on AMD.499499+ mce=nobootlog Don't log MCEs from before booting. */507500static int __init mcheck_enable(char *str)508501{509502 if (*str == '=')510503 str++;511504 if (!strcmp(str, "off"))512505 mce_dont_init = 1;513513- else if (!strcmp(str, "bootlog"))514514- mce_bootlog = 1;506506+ else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))507507+ mce_bootlog = str[0] == 'b';515508 else if (isdigit(str[0]))516509 get_option(&str, &tolerant);517510 else
+538
arch/x86_64/kernel/mce_amd.c
···11+/*22+ * (c) 2005 Advanced Micro Devices, Inc.33+ * Your use of this code is subject to the terms and conditions of the44+ * GNU general public license version 2. See "COPYING" or55+ * http://www.gnu.org/licenses/gpl.html66+ *77+ * Written by Jacob Shin - AMD, Inc.88+ *99+ * Support : jacob.shin@amd.com1010+ *1111+ * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.1212+ * MC4_MISC0 exists per physical processor.1313+ *1414+ */1515+1616+#include <linux/cpu.h>1717+#include <linux/errno.h>1818+#include <linux/init.h>1919+#include <linux/interrupt.h>2020+#include <linux/kobject.h>2121+#include <linux/notifier.h>2222+#include <linux/sched.h>2323+#include <linux/smp.h>2424+#include <linux/sysdev.h>2525+#include <linux/sysfs.h>2626+#include <asm/apic.h>2727+#include <asm/mce.h>2828+#include <asm/msr.h>2929+#include <asm/percpu.h>3030+3131+#define PFX "mce_threshold: "3232+#define VERSION "version 1.00.9"3333+#define NR_BANKS 53434+#define THRESHOLD_MAX 0xFFF3535+#define INT_TYPE_APIC 0x000200003636+#define MASK_VALID_HI 0x800000003737+#define MASK_LVTOFF_HI 0x00F000003838+#define MASK_COUNT_EN_HI 0x000800003939+#define MASK_INT_TYPE_HI 0x000600004040+#define MASK_OVERFLOW_HI 0x000100004141+#define MASK_ERR_COUNT_HI 0x00000FFF4242+#define MASK_OVERFLOW 0x0001000000000000L4343+4444+struct threshold_bank {4545+ unsigned int cpu;4646+ u8 bank;4747+ u8 interrupt_enable;4848+ u16 threshold_limit;4949+ struct kobject kobj;5050+};5151+5252+static struct threshold_bank threshold_defaults = {5353+ .interrupt_enable = 0,5454+ .threshold_limit = THRESHOLD_MAX,5555+};5656+5757+#ifdef CONFIG_SMP5858+static unsigned char shared_bank[NR_BANKS] = {5959+ 0, 0, 0, 0, 16060+};6161+#endif6262+6363+static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */6464+6565+/*6666+ * CPU Initialization6767+ */6868+6969+/* must be called with correct cpu affinity */7070+static void threshold_restart_bank(struct threshold_bank *b,7171+ int reset, u16 old_limit)7272+{7373+ u32 mci_misc_hi, mci_misc_lo;7474+7575+ rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);7676+7777+ if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))7878+ reset = 1; /* limit cannot be lower than err count */7979+8080+ if (reset) { /* reset err count and overflow bit */8181+ mci_misc_hi =8282+ (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |8383+ (THRESHOLD_MAX - b->threshold_limit);8484+ } else if (old_limit) { /* change limit w/o reset */8585+ int new_count = (mci_misc_hi & THRESHOLD_MAX) +8686+ (old_limit - b->threshold_limit);8787+ mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |8888+ (new_count & THRESHOLD_MAX);8989+ }9090+9191+ b->interrupt_enable ?9292+ (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :9393+ (mci_misc_hi &= ~MASK_INT_TYPE_HI);9494+9595+ mci_misc_hi |= MASK_COUNT_EN_HI;9696+ wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);9797+}9898+9999+void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)100100+{101101+ int bank;102102+ u32 mci_misc_lo, mci_misc_hi;103103+ unsigned int cpu = smp_processor_id();104104+105105+ for (bank = 0; bank < NR_BANKS; ++bank) {106106+ rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);107107+108108+ /* !valid, !counter present, bios locked */109109+ if (!(mci_misc_hi & MASK_VALID_HI) ||110110+ !(mci_misc_hi & MASK_VALID_HI >> 1) ||111111+ (mci_misc_hi & MASK_VALID_HI >> 2))112112+ continue;113113+114114+ per_cpu(bank_map, cpu) |= (1 << bank);115115+116116+#ifdef CONFIG_SMP117117+ if (shared_bank[bank] && cpu_core_id[cpu])118118+ continue;119119+#endif120120+121121+ setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);122122+ threshold_defaults.cpu = cpu;123123+ threshold_defaults.bank = bank;124124+ threshold_restart_bank(&threshold_defaults, 0, 0);125125+ }126126+}127127+128128+/*129129+ * APIC Interrupt Handler130130+ */131131+132132+/*133133+ * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.134134+ * the interrupt goes off when error_count reaches threshold_limit.135135+ * the handler will simply log mcelog w/ software defined bank number.136136+ */137137+asmlinkage void mce_threshold_interrupt(void)138138+{139139+ int bank;140140+ struct mce m;141141+142142+ ack_APIC_irq();143143+ irq_enter();144144+145145+ memset(&m, 0, sizeof(m));146146+ rdtscll(m.tsc);147147+ m.cpu = smp_processor_id();148148+149149+ /* assume first bank caused it */150150+ for (bank = 0; bank < NR_BANKS; ++bank) {151151+ m.bank = MCE_THRESHOLD_BASE + bank;152152+ rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);153153+154154+ if (m.misc & MASK_OVERFLOW) {155155+ mce_log(&m);156156+ goto out;157157+ }158158+ }159159+ out:160160+ irq_exit();161161+}162162+163163+/*164164+ * Sysfs Interface165165+ */166166+167167+static struct sysdev_class threshold_sysclass = {168168+ set_kset_name("threshold"),169169+};170170+171171+static DEFINE_PER_CPU(struct sys_device, device_threshold);172172+173173+struct threshold_attr {174174+ struct attribute attr;175175+ ssize_t(*show) (struct threshold_bank *, char *);176176+ ssize_t(*store) (struct threshold_bank *, const char *, size_t count);177177+};178178+179179+static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);180180+181181+static cpumask_t affinity_set(unsigned int cpu)182182+{183183+ cpumask_t oldmask = current->cpus_allowed;184184+ cpumask_t newmask = CPU_MASK_NONE;185185+ cpu_set(cpu, newmask);186186+ set_cpus_allowed(current, newmask);187187+ return oldmask;188188+}189189+190190+static void affinity_restore(cpumask_t oldmask)191191+{192192+ set_cpus_allowed(current, oldmask);193193+}194194+195195+#define SHOW_FIELDS(name) \196196+ static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \197197+ { \198198+ return sprintf(buf, "%lx\n", (unsigned long) b->name); \199199+ }200200+SHOW_FIELDS(interrupt_enable)201201+SHOW_FIELDS(threshold_limit)202202+203203+static ssize_t store_interrupt_enable(struct threshold_bank *b,204204+ const char *buf, size_t count)205205+{206206+ char *end;207207+ cpumask_t oldmask;208208+ unsigned long new = simple_strtoul(buf, &end, 0);209209+ if (end == buf)210210+ return -EINVAL;211211+ b->interrupt_enable = !!new;212212+213213+ oldmask = affinity_set(b->cpu);214214+ threshold_restart_bank(b, 0, 0);215215+ affinity_restore(oldmask);216216+217217+ return end - buf;218218+}219219+220220+static ssize_t store_threshold_limit(struct threshold_bank *b,221221+ const char *buf, size_t count)222222+{223223+ char *end;224224+ cpumask_t oldmask;225225+ u16 old;226226+ unsigned long new = simple_strtoul(buf, &end, 0);227227+ if (end == buf)228228+ return -EINVAL;229229+ if (new > THRESHOLD_MAX)230230+ new = THRESHOLD_MAX;231231+ if (new < 1)232232+ new = 1;233233+ old = b->threshold_limit;234234+ b->threshold_limit = new;235235+236236+ oldmask = affinity_set(b->cpu);237237+ threshold_restart_bank(b, 0, old);238238+ affinity_restore(oldmask);239239+240240+ return end - buf;241241+}242242+243243+static ssize_t show_error_count(struct threshold_bank *b, char *buf)244244+{245245+ u32 high, low;246246+ cpumask_t oldmask;247247+ oldmask = affinity_set(b->cpu);248248+ rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */249249+ affinity_restore(oldmask);250250+ return sprintf(buf, "%x\n",251251+ (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));252252+}253253+254254+static ssize_t store_error_count(struct threshold_bank *b,255255+ const char *buf, size_t count)256256+{257257+ cpumask_t oldmask;258258+ oldmask = affinity_set(b->cpu);259259+ threshold_restart_bank(b, 1, 0);260260+ affinity_restore(oldmask);261261+ return 1;262262+}263263+264264+#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \265265+ .attr = {.name = __stringify(_name), .mode = _mode }, \266266+ .show = _show, \267267+ .store = _store, \268268+};269269+270270+#define ATTR_FIELDS(name) \271271+ static struct threshold_attr name = \272272+ THRESHOLD_ATTR(name, 0644, show_## name, store_## name)273273+274274+ATTR_FIELDS(interrupt_enable);275275+ATTR_FIELDS(threshold_limit);276276+ATTR_FIELDS(error_count);277277+278278+static struct attribute *default_attrs[] = {279279+ &interrupt_enable.attr,280280+ &threshold_limit.attr,281281+ &error_count.attr,282282+ NULL283283+};284284+285285+#define to_bank(k) container_of(k,struct threshold_bank,kobj)286286+#define to_attr(a) container_of(a,struct threshold_attr,attr)287287+288288+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)289289+{290290+ struct threshold_bank *b = to_bank(kobj);291291+ struct threshold_attr *a = to_attr(attr);292292+ ssize_t ret;293293+ ret = a->show ? a->show(b, buf) : -EIO;294294+ return ret;295295+}296296+297297+static ssize_t store(struct kobject *kobj, struct attribute *attr,298298+ const char *buf, size_t count)299299+{300300+ struct threshold_bank *b = to_bank(kobj);301301+ struct threshold_attr *a = to_attr(attr);302302+ ssize_t ret;303303+ ret = a->store ? a->store(b, buf, count) : -EIO;304304+ return ret;305305+}306306+307307+static struct sysfs_ops threshold_ops = {308308+ .show = show,309309+ .store = store,310310+};311311+312312+static struct kobj_type threshold_ktype = {313313+ .sysfs_ops = &threshold_ops,314314+ .default_attrs = default_attrs,315315+};316316+317317+/* symlinks sibling shared banks to first core. first core owns dir/files. */318318+static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)319319+{320320+ int err = 0;321321+ struct threshold_bank *b = 0;322322+323323+#ifdef CONFIG_SMP324324+ if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */325325+ char name[16];326326+ unsigned lcpu = first_cpu(cpu_core_map[cpu]);327327+ if (cpu_core_id[lcpu])328328+ goto out; /* first core not up yet */329329+330330+ b = per_cpu(threshold_banks, lcpu)[bank];331331+ if (!b)332332+ goto out;333333+ sprintf(name, "bank%i", bank);334334+ err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,335335+ &b->kobj, name);336336+ if (err)337337+ goto out;338338+ per_cpu(threshold_banks, cpu)[bank] = b;339339+ goto out;340340+ }341341+#endif342342+343343+ b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);344344+ if (!b) {345345+ err = -ENOMEM;346346+ goto out;347347+ }348348+ memset(b, 0, sizeof(struct threshold_bank));349349+350350+ b->cpu = cpu;351351+ b->bank = bank;352352+ b->interrupt_enable = 0;353353+ b->threshold_limit = THRESHOLD_MAX;354354+ kobject_set_name(&b->kobj, "bank%i", bank);355355+ b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;356356+ b->kobj.ktype = &threshold_ktype;357357+358358+ err = kobject_register(&b->kobj);359359+ if (err) {360360+ kfree(b);361361+ goto out;362362+ }363363+ per_cpu(threshold_banks, cpu)[bank] = b;364364+ out:365365+ return err;366366+}367367+368368+/* create dir/files for all valid threshold banks */369369+static __cpuinit int threshold_create_device(unsigned int cpu)370370+{371371+ int bank;372372+ int err = 0;373373+374374+ per_cpu(device_threshold, cpu).id = cpu;375375+ per_cpu(device_threshold, cpu).cls = &threshold_sysclass;376376+ err = sysdev_register(&per_cpu(device_threshold, cpu));377377+ if (err)378378+ goto out;379379+380380+ for (bank = 0; bank < NR_BANKS; ++bank) {381381+ if (!(per_cpu(bank_map, cpu) & 1 << bank))382382+ continue;383383+ err = threshold_create_bank(cpu, bank);384384+ if (err)385385+ goto out;386386+ }387387+ out:388388+ return err;389389+}390390+391391+#ifdef CONFIG_HOTPLUG_CPU392392+/*393393+ * let's be hotplug friendly.394394+ * in case of multiple core processors, the first core always takes ownership395395+ * of shared sysfs dir/files, and rest of the cores will be symlinked to it.396396+ */397397+398398+/* cpu hotplug call removes all symlinks before first core dies */399399+static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)400400+{401401+ struct threshold_bank *b;402402+ char name[16];403403+404404+ b = per_cpu(threshold_banks, cpu)[bank];405405+ if (!b)406406+ return;407407+ if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {408408+ sprintf(name, "bank%i", bank);409409+ sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);410410+ per_cpu(threshold_banks, cpu)[bank] = 0;411411+ } else {412412+ kobject_unregister(&b->kobj);413413+ kfree(per_cpu(threshold_banks, cpu)[bank]);414414+ }415415+}416416+417417+static __cpuinit void threshold_remove_device(unsigned int cpu)418418+{419419+ int bank;420420+421421+ for (bank = 0; bank < NR_BANKS; ++bank) {422422+ if (!(per_cpu(bank_map, cpu) & 1 << bank))423423+ continue;424424+ threshold_remove_bank(cpu, bank);425425+ }426426+ sysdev_unregister(&per_cpu(device_threshold, cpu));427427+}428428+429429+/* link all existing siblings when first core comes up */430430+static __cpuinit int threshold_create_symlinks(unsigned int cpu)431431+{432432+ int bank, err = 0;433433+ unsigned int lcpu = 0;434434+435435+ if (cpu_core_id[cpu])436436+ return 0;437437+ for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {438438+ if (lcpu == cpu)439439+ continue;440440+ for (bank = 0; bank < NR_BANKS; ++bank) {441441+ if (!(per_cpu(bank_map, cpu) & 1 << bank))442442+ continue;443443+ if (!shared_bank[bank])444444+ continue;445445+ err = threshold_create_bank(lcpu, bank);446446+ }447447+ }448448+ return err;449449+}450450+451451+/* remove all symlinks before first core dies. */452452+static __cpuinit void threshold_remove_symlinks(unsigned int cpu)453453+{454454+ int bank;455455+ unsigned int lcpu = 0;456456+ if (cpu_core_id[cpu])457457+ return;458458+ for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {459459+ if (lcpu == cpu)460460+ continue;461461+ for (bank = 0; bank < NR_BANKS; ++bank) {462462+ if (!(per_cpu(bank_map, cpu) & 1 << bank))463463+ continue;464464+ if (!shared_bank[bank])465465+ continue;466466+ threshold_remove_bank(lcpu, bank);467467+ }468468+ }469469+}470470+#else /* !CONFIG_HOTPLUG_CPU */471471+static __cpuinit void threshold_create_symlinks(unsigned int cpu)472472+{473473+}474474+static __cpuinit void threshold_remove_symlinks(unsigned int cpu)475475+{476476+}477477+static void threshold_remove_device(unsigned int cpu)478478+{479479+}480480+#endif481481+482482+/* get notified when a cpu comes on/off */483483+static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb,484484+ unsigned long action, void *hcpu)485485+{486486+ /* cpu was unsigned int to begin with */487487+ unsigned int cpu = (unsigned long)hcpu;488488+489489+ if (cpu >= NR_CPUS)490490+ goto out;491491+492492+ switch (action) {493493+ case CPU_ONLINE:494494+ threshold_create_device(cpu);495495+ threshold_create_symlinks(cpu);496496+ break;497497+ case CPU_DOWN_PREPARE:498498+ threshold_remove_symlinks(cpu);499499+ break;500500+ case CPU_DOWN_FAILED:501501+ threshold_create_symlinks(cpu);502502+ break;503503+ case CPU_DEAD:504504+ threshold_remove_device(cpu);505505+ break;506506+ default:507507+ break;508508+ }509509+ out:510510+ return NOTIFY_OK;511511+}512512+513513+static struct notifier_block threshold_cpu_notifier = {514514+ .notifier_call = threshold_cpu_callback,515515+};516516+517517+static __init int threshold_init_device(void)518518+{519519+ int err;520520+ int lcpu = 0;521521+522522+ err = sysdev_class_register(&threshold_sysclass);523523+ if (err)524524+ goto out;525525+526526+ /* to hit CPUs online before the notifier is up */527527+ for_each_online_cpu(lcpu) {528528+ err = threshold_create_device(lcpu);529529+ if (err)530530+ goto out;531531+ }532532+ register_cpu_notifier(&threshold_cpu_notifier);533533+534534+ out:535535+ return err;536536+}537537+538538+device_initcall(threshold_init_device);
+15-8
arch/x86_64/kernel/mpparse.c
···4242 * Various Linux-internal data structures created from the4343 * MP-table.4444 */4545-int apic_version [MAX_APICS];4545+unsigned char apic_version [MAX_APICS];4646unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };4747int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };4848···6565/* Processor that is doing the boot up */6666unsigned int boot_cpu_id = -1U;6767/* Internal processor count */6868-static unsigned int num_processors = 0;6868+unsigned int num_processors __initdata = 0;6969+7070+unsigned disabled_cpus __initdata;69717072/* Bitmask of physically existing CPUs */7173physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;···108106109107static void __init MP_processor_info (struct mpc_config_processor *m)110108{111111- int ver, cpu;109109+ int cpu;110110+ unsigned char ver;112111 static int found_bsp=0;113112114114- if (!(m->mpc_cpuflag & CPU_ENABLED))113113+ if (!(m->mpc_cpuflag & CPU_ENABLED)) {114114+ disabled_cpus++;115115 return;116116+ }116117117118 printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",118119 m->mpc_apicid,···134129 }135130136131 cpu = num_processors++;137137-138138- if (m->mpc_apicid > MAX_APICS) {132132+133133+#if MAX_APICS < 255 134134+ if ((int)m->mpc_apicid > MAX_APICS) {139135 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",140136 m->mpc_apicid, MAX_APICS);141137 return;142138 }139139+#endif143140 ver = m->mpc_apicver;144141145142 physid_set(m->mpc_apicid, phys_cpu_present_map);···225218 m->mpc_irqtype, m->mpc_irqflag & 3,226219 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,227220 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);228228- if (++mp_irq_entries == MAX_IRQ_SOURCES)221221+ if (++mp_irq_entries >= MAX_IRQ_SOURCES)229222 panic("Max # of irq sources exceeded!!\n");230223}231224···556549 * Read the physical hardware table. Anything here will557550 * override the defaults.558551 */559559- if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) {552552+ if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) {560553 smp_found_config = 0;561554 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");562555 printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+7-1
arch/x86_64/kernel/pci-gart.c
···220220 uses the normal dma_mask for alloc_coherent. */221221 dma_mask &= *dev->dma_mask;222222223223+ /* Why <=? Even when the mask is smaller than 4GB it is often larger 224224+ than 16MB and in this case we have a chance of finding fitting memory 225225+ in the next higher zone first. If not retry with true GFP_DMA. -AK */226226+ if (dma_mask <= 0xffffffff)227227+ gfp |= GFP_DMA32;228228+223229 again:224230 memory = dma_alloc_pages(dev, gfp, get_order(size));225231 if (memory == NULL)···251245 }252246253247 if (!(gfp & GFP_DMA)) { 254254- gfp |= GFP_DMA; 248248+ gfp = (gfp & ~GFP_DMA32) | GFP_DMA;255249 goto again;256250 }257251 return NULL;
+27-20
arch/x86_64/kernel/process.c
···144144 do {145145 ssleep(1);146146 for_each_online_cpu(cpu) {147147- if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))147147+ if (cpu_isset(cpu, map) &&148148+ !per_cpu(cpu_idle_state, cpu))148149 cpu_clear(cpu, map);149150 }150151 cpus_and(map, map, cpu_online_map);···276275 system_utsname.version);277276 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);278277 printk_address(regs->rip); 279279- printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);278278+ printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,279279+ regs->eflags);280280 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",281281 regs->rax, regs->rbx, regs->rcx);282282 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",···429427 struct pt_regs * childregs;430428 struct task_struct *me = current;431429432432- childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;433433-430430+ childregs = ((struct pt_regs *)431431+ (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;434432 *childregs = *regs;435433436434 childregs->rax = 0;437435 childregs->rsp = rsp;438438- if (rsp == ~0UL) {436436+ if (rsp == ~0UL)439437 childregs->rsp = (unsigned long)childregs;440440- }441438442439 p->thread.rsp = (unsigned long) childregs;443440 p->thread.rsp0 = (unsigned long) (childregs+1);···458457 p->thread.io_bitmap_max = 0;459458 return -ENOMEM;460459 }461461- memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);460460+ memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,461461+ IO_BITMAP_BYTES);462462 } 463463464464 /*···496494 * - fold all the options into a flag word and test it with a single test.497495 * - could test fs/gs bitsliced498496 */499499-struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)497497+struct task_struct *498498+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)500499{501500 struct thread_struct *prev = &prev_p->thread,502501 *next = &next_p->thread;···568565 prev->userrsp = read_pda(oldrsp); 569566 write_pda(oldrsp, next->userrsp); 570567 write_pda(pcurrent, next_p); 571571- write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);568568+ write_pda(kernelstack,569569+ (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);572570573571 /*574572 * Now maybe reload the debug registers···650646 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);651647}652648653653-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)649649+asmlinkage long650650+sys_clone(unsigned long clone_flags, unsigned long newsp,651651+ void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)654652{655653 if (!newsp)656654 newsp = regs->rsp;···688682 return 0;689683 fp = *(u64 *)(p->thread.rsp);690684 do { 691691- if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)685685+ if (fp < (unsigned long)stack ||686686+ fp > (unsigned long)stack+THREAD_SIZE)692687 return 0; 693688 rip = *(u64 *)(fp+8); 694689 if (!in_sched_functions(rip))···724717 task->thread.gsindex = 0;725718 task->thread.gs = addr;726719 if (doit) {727727- load_gs_index(0);728728- ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 720720+ load_gs_index(0);721721+ ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);729722 } 730723 }731724 put_cpu();···742735 set_32bit_tls(task, FS_TLS, addr);743736 if (doit) { 744737 load_TLS(&task->thread, cpu); 745745- asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));738738+ asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));746739 }747740 task->thread.fsindex = FS_TLS_SEL;748741 task->thread.fs = 0;···752745 if (doit) {753746 /* set the selector to 0 to not confuse754747 __switch_to */755755- asm volatile("movl %0,%%fs" :: "r" (0));756756- ret = checking_wrmsrl(MSR_FS_BASE, addr); 748748+ asm volatile("movl %0,%%fs" :: "r" (0));749749+ ret = checking_wrmsrl(MSR_FS_BASE, addr);757750 }758751 }759752 put_cpu();···762755 unsigned long base; 763756 if (task->thread.fsindex == FS_TLS_SEL)764757 base = read_32bit_tls(task, FS_TLS);765765- else if (doit) {758758+ else if (doit)766759 rdmsrl(MSR_FS_BASE, base);767767- } else760760+ else768761 base = task->thread.fs;769762 ret = put_user(base, (unsigned long __user *)addr); 770763 break; ···773766 unsigned long base;774767 if (task->thread.gsindex == GS_TLS_SEL)775768 base = read_32bit_tls(task, GS_TLS);776776- else if (doit) {769769+ else if (doit)777770 rdmsrl(MSR_KERNEL_GS_BASE, base);778778- } else771771+ else779772 base = task->thread.gs;780773 ret = put_user(base, (unsigned long __user *)addr); 781774 break;
+4-3
arch/x86_64/kernel/reboot.c
···77777878void machine_shutdown(void)7979{8080+ unsigned long flags;8081 /* Stop the cpus and apics */8182#ifdef CONFIG_SMP8283 int reboot_cpu_id;···9998 smp_send_stop();10099#endif101100102102- local_irq_disable();101101+ local_irq_save(flags);103102104103#ifndef CONFIG_SMP105104 disable_local_APIC();···107106108107 disable_IO_APIC();109108110110- local_irq_enable();109109+ local_irq_restore(flags);111110}112111113112void machine_emergency_restart(void)···121120 /* Could also try the reset bit in the Hammer NB */122121 switch (reboot_type) { 123122 case BOOT_KBD:124124- for (i=0; i<100; i++) {123123+ for (i=0; i<10; i++) {125124 kb_wait();126125 udelay(50);127126 outb(0xfe,0x64); /* pulse reset low */
···141141 panic("cannot allocate irqstack for cpu %d", cpu); 142142 }143143144144- asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));145144146145 pda->irqstackptr += IRQSTACKSIZE-64;147146} ···196197 /* CPU 0 is initialised in head64.c */197198 if (cpu != 0) {198199 pda_init(cpu);200200+ zap_low_mappings(cpu);199201 } else 200202 estacks = boot_exception_stacks; 201203
+17
arch/x86_64/kernel/signal.c
···110110 COPY(r14);111111 COPY(r15);112112113113+ /* Kernel saves and restores only the CS segment register on signals,114114+ * which is the bare minimum needed to allow mixed 32/64-bit code.115115+ * App's signal handler can save/restore other segments if needed. */116116+ {117117+ unsigned cs;118118+ err |= __get_user(cs, &sc->cs);119119+ regs->cs = cs | 3; /* Force into user mode */120120+ }121121+113122 {114123 unsigned int tmpflags;115124 err |= __get_user(tmpflags, &sc->eflags);···196187{197188 int err = 0;198189190190+ err |= __put_user(regs->cs, &sc->cs);199191 err |= __put_user(0, &sc->gs);200192 err |= __put_user(0, &sc->fs);201193···328318329319 regs->rsp = (unsigned long)frame;330320321321+ /* Set up the CS register to run signal handlers in 64-bit mode,322322+ even if the handler happens to be interrupting 32-bit code. */323323+ regs->cs = __USER_CS;324324+325325+ /* This, by contrast, has nothing to do with segment registers -326326+ see include/asm-x86_64/uaccess.h for details. */331327 set_fs(USER_DS);328328+332329 regs->eflags &= ~TF_MASK;333330 if (test_thread_flag(TIF_SINGLESTEP))334331 ptrace_notify(SIGTRAP);
···6464int smp_num_siblings = 1;6565/* Package ID of each logical CPU */6666u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };6767+/* core ID of each logical CPU */6768u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };68696970/* Bitmask of currently online CPUs */···8887/* Set when the idlers are all forked */8988int smp_threads_ready;90899090+/* representing HT siblings of each logical CPU */9191cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;9292+9393+/* representing HT and core siblings of each logical CPU */9294cpumask_t cpu_core_map[NR_CPUS] __read_mostly;9395EXPORT_SYMBOL(cpu_core_map);9496···438434 cpu_set(cpuid, cpu_callin_map);439435}440436437437+/* representing cpus for which sibling maps can be computed */438438+static cpumask_t cpu_sibling_setup_map;439439+441440static inline void set_cpu_sibling_map(int cpu)442441{443442 int i;443443+ struct cpuinfo_x86 *c = cpu_data;444444+445445+ cpu_set(cpu, cpu_sibling_setup_map);444446445447 if (smp_num_siblings > 1) {446446- for_each_cpu(i) {447447- if (cpu_core_id[cpu] == cpu_core_id[i]) {448448+ for_each_cpu_mask(i, cpu_sibling_setup_map) {449449+ if (phys_proc_id[cpu] == phys_proc_id[i] &&450450+ cpu_core_id[cpu] == cpu_core_id[i]) {448451 cpu_set(i, cpu_sibling_map[cpu]);449452 cpu_set(cpu, cpu_sibling_map[i]);453453+ cpu_set(i, cpu_core_map[cpu]);454454+ cpu_set(cpu, cpu_core_map[i]);450455 }451456 }452457 } else {453458 cpu_set(cpu, cpu_sibling_map[cpu]);454459 }455460456456- if (current_cpu_data.x86_num_cores > 1) {457457- for_each_cpu(i) {458458- if (phys_proc_id[cpu] == phys_proc_id[i]) {459459- cpu_set(i, cpu_core_map[cpu]);460460- cpu_set(cpu, cpu_core_map[i]);461461- }462462- }463463- } else {461461+ if (current_cpu_data.x86_max_cores == 1) {464462 cpu_core_map[cpu] = cpu_sibling_map[cpu];463463+ c[cpu].booted_cores = 1;464464+ return;465465+ }466466+467467+ for_each_cpu_mask(i, cpu_sibling_setup_map) {468468+ if (phys_proc_id[cpu] == phys_proc_id[i]) {469469+ cpu_set(i, cpu_core_map[cpu]);470470+ cpu_set(cpu, cpu_core_map[i]);471471+ /*472472+ * Does this new cpu bringup a new core?473473+ */474474+ if (cpus_weight(cpu_sibling_map[cpu]) == 1) {475475+ /*476476+ * for each core in package, increment477477+ * the booted_cores for this new cpu478478+ */479479+ if (first_cpu(cpu_sibling_map[i]) == i)480480+ c[cpu].booted_cores++;481481+ /*482482+ * increment the core count for all483483+ * the other cpus in this package484484+ */485485+ if (i != cpu)486486+ c[i].booted_cores++;487487+ } else if (i != cpu && !c[cpu].booted_cores)488488+ c[cpu].booted_cores = c[i].booted_cores;489489+ }465490 }466491}467492···912879}913880914881#ifdef CONFIG_HOTPLUG_CPU882882+883883+int additional_cpus __initdata = -1;884884+915885/*916886 * cpu_possible_map should be static, it cannot change as cpu's917887 * are onlined, or offlined. The reason is per-cpu data-structures···923887 * cpu_present_map on the other hand can change dynamically.924888 * In case when cpu_hotplug is not compiled, then we resort to current925889 * behaviour, which is cpu_possible == cpu_present.926926- * If cpu-hotplug is supported, then we need to preallocate for all927927- * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.928890 * - Ashok Raj891891+ *892892+ * Three ways to find out the number of additional hotplug CPUs:893893+ * - If the BIOS specified disabled CPUs in ACPI/mptables use that.894894+ * - otherwise use half of the available CPUs or 2, whatever is more.895895+ * - The user can overwrite it with additional_cpus=NUM896896+ * We do this because additional CPUs waste a lot of memory.897897+ * -AK929898 */930899__init void prefill_possible_map(void)931900{932901 int i;933933- for (i = 0; i < NR_CPUS; i++)902902+ int possible;903903+904904+ if (additional_cpus == -1) {905905+ if (disabled_cpus > 0) {906906+ additional_cpus = disabled_cpus;907907+ } else {908908+ additional_cpus = num_processors / 2;909909+ if (additional_cpus == 0)910910+ additional_cpus = 2;911911+ }912912+ }913913+ possible = num_processors + additional_cpus;914914+ if (possible > NR_CPUS) 915915+ possible = NR_CPUS;916916+917917+ printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",918918+ possible,919919+ max_t(int, possible - num_processors, 0));920920+921921+ for (i = 0; i < possible; i++)934922 cpu_set(i, cpu_possible_map);935923}936924#endif···1025965 nmi_watchdog_default();1026966 current_cpu_data = boot_cpu_data;1027967 current_thread_info()->cpu = 0; /* needed? */968968+ set_cpu_sibling_map(0);10289691029970 if (smp_sanity_check(max_cpus) < 0) {1030971 printk(KERN_INFO "SMP disabled\n");···10691008 int me = smp_processor_id();10701009 cpu_set(me, cpu_online_map);10711010 cpu_set(me, cpu_callout_map);10721072- cpu_set(0, cpu_sibling_map[0]);10731073- cpu_set(0, cpu_core_map[0]);10741011 per_cpu(cpu_state, me) = CPU_ONLINE;10751012}10761013···11211062 */11221063void __init smp_cpus_done(unsigned int max_cpus)11231064{11241124-#ifndef CONFIG_HOTPLUG_CPU11251125- zap_low_mappings();11261126-#endif11271065 smp_cleanup_boot();1128106611291067#ifdef CONFIG_X86_IO_APIC···11371081static void remove_siblinginfo(int cpu)11381082{11391083 int sibling;10841084+ struct cpuinfo_x86 *c = cpu_data;1140108510861086+ for_each_cpu_mask(sibling, cpu_core_map[cpu]) {10871087+ cpu_clear(cpu, cpu_core_map[sibling]);10881088+ /*10891089+ * last thread sibling in this cpu core going down10901090+ */10911091+ if (cpus_weight(cpu_sibling_map[cpu]) == 1)10921092+ c[sibling].booted_cores--;10931093+ }10941094+11411095 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])11421096 cpu_clear(cpu, cpu_sibling_map[sibling]);11431143- for_each_cpu_mask(sibling, cpu_core_map[cpu])11441144- cpu_clear(cpu, cpu_core_map[sibling]);11451097 cpus_clear(cpu_sibling_map[cpu]);11461098 cpus_clear(cpu_core_map[cpu]);11471099 phys_proc_id[cpu] = BAD_APICID;11481100 cpu_core_id[cpu] = BAD_APICID;11011101+ cpu_clear(cpu, cpu_sibling_setup_map);11491102}1150110311511104void remove_cpu_from_maps(void)···12171152 }12181153 printk(KERN_ERR "CPU %u didn't die...\n", cpu);12191154}11551155+11561156+static __init int setup_additional_cpus(char *s)11571157+{11581158+ return get_option(&s, &additional_cpus);11591159+}11601160+__setup("additional_cpus=", setup_additional_cpus);1220116112211162#else /* ... !CONFIG_HOTPLUG_CPU */12221163
-14
arch/x86_64/kernel/sys_x86_64.c
···154154 err |= copy_to_user(&name->machine, "i686", 5); 155155 return err ? -EFAULT : 0;156156}157157-158158-asmlinkage long sys_time64(long __user * tloc)159159-{160160- struct timeval now; 161161- int i; 162162-163163- do_gettimeofday(&now);164164- i = now.tv_sec;165165- if (tloc) {166166- if (put_user(i,tloc))167167- i = -EFAULT;168168- }169169- return i;170170-}
···1919static inline void mach_reboot(void)2020{2121 int i;2222- for (i = 0; i < 100; i++) {2222+ for (i = 0; i < 10; i++) {2323 kb_wait();2424 udelay(50);2525 outb(0x60, 0x64); /* write Controller Command Byte */
+3-1
include/asm-i386/processor.h
···6565 int f00f_bug;6666 int coma_bug;6767 unsigned long loops_per_jiffy;6868- unsigned char x86_num_cores;6868+ unsigned char x86_max_cores; /* cpuid returned max cores value */6969+ unsigned char booted_cores; /* number of cores as seen by OS */7070+ unsigned char apicid;6971} __attribute__((__aligned__(SMP_CACHE_BYTES)));70727173#define X86_VENDOR_INTEL 0
+2
include/asm-x86_64/apic.h
···111111112112extern int disable_timer_pin_1;113113114114+extern void setup_threshold_lvt(unsigned long lvt_off);115115+114116#endif /* CONFIG_X86_LOCAL_APIC */115117116118extern unsigned boot_cpu_id;
+1-1
include/asm-x86_64/cache.h
···99/* L1 cache line size */1010#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)1111#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)1212-#define L1_CACHE_SHIFT_MAX 6 /* largest L1 which this arch supports */1212+#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */13131414#endif
···72727373#define MAX_DMA_CHANNELS 874747575-/* The maximum address that we can perform a DMA transfer to on this platform */7676-#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000)7575+7676+/* 16MB ISA DMA zone */7777+#define MAX_DMA_PFN ((16*1024*1024) >> PAGE_SHIFT)7878+7979+/* 4GB broken PCI/AGP hardware bus master zone */8080+#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)8181+8282+/* Compat define for old dma zone */8383+#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))77847885/* 8237 DMA controllers */7986#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */
···1717/* Simple perfect hash to map physical addresses to node numbers */1818extern int memnode_shift; 1919extern u8 memnodemap[NODEMAPSIZE]; 2020-extern int maxnode;21202221extern struct pglist_data *node_data[];23222423static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) 2524{ 2626- int nid; 2525+ unsigned nid; 2726 VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);2827 nid = memnodemap[addr >> memnode_shift]; 2929- VIRTUAL_BUG_ON(nid > maxnode); 2828+ VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); 3029 return nid; 3130} 3231···4041#define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)4142#define kvaddr_to_nid(kaddr) phys_to_nid(__pa(kaddr))42434343-/* AK: this currently doesn't deal with invalid addresses. We'll see 4444- if the 2.5 kernel doesn't pass them4545- (2.4 used to). */4444+/* Requires pfn_valid(pfn) to be true */4645#define pfn_to_page(pfn) ({ \4746 int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); \4847 ((pfn) - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; \
+4-3
include/asm-x86_64/mpspec.h
···1616/*1717 * A maximum of 255 APICs with the current APIC ID architecture.1818 */1919-#define MAX_APICS 1281919+#define MAX_APICS 25520202121struct intel_mp_floating2222{···157157 */158158159159#define MAX_MP_BUSSES 256160160-#define MAX_IRQ_SOURCES 256160160+/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */161161+#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)161162enum mp_bustype {162163 MP_BUS_ISA = 1,163164 MP_BUS_EISA,···173172extern void find_smp_config (void);174173extern void get_smp_config (void);175174extern int nr_ioapics;176176-extern int apic_version [MAX_APICS];175175+extern unsigned char apic_version [MAX_APICS];177176extern int mp_irq_entries;178177extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];179178extern int mpc_default_type;
···1515 int irqcount; /* Irq nesting counter. Starts with -1 */ 1616 int cpunumber; /* Logical CPU number */1717 char *irqstackptr; /* top of irqstack */1818+ int nodenumber; /* number of current node */1819 unsigned int __softirq_pending;1920 unsigned int __nmi_count; /* number of NMI on this CPUs */2021 struct mm_struct *active_mm;
···6161 int x86_cache_alignment;6262 int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/6363 __u8 x86_virt_bits, x86_phys_bits;6464- __u8 x86_num_cores;6464+ __u8 x86_max_cores; /* cpuid returned max cores value */6565 __u32 x86_power; 6666 __u32 extended_cpuid_level; /* Max extended CPUID function supported */6767 unsigned long loops_per_jiffy;6868+ __u8 apicid;6969+ __u8 booted_cores; /* number of cores as seen by OS */6870} ____cacheline_aligned;69717072#define X86_VENDOR_INTEL 0
+4
include/asm-x86_64/proto.h
···1111extern void start_kernel(void);1212extern void pda_init(int); 13131414+extern void zap_low_mappings(int cpu);1515+1416extern void early_idt_handler(void);15171618extern void mcheck_init(struct cpuinfo_x86 *c);···2422#define mtrr_bp_init() do {} while (0)2523#endif2624extern void init_memory_mapping(unsigned long start, unsigned long end);2525+extern void size_zones(unsigned long *z, unsigned long *h,2626+ unsigned long start_pfn, unsigned long end_pfn);27272828extern void system_call(void); 2929extern int kernel_syscall(void);
-283
include/asm-x86_64/rwsem.h
···11-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for x86_64+22- *33- * Written by David Howells (dhowells@redhat.com).44- * Ported by Andi Kleen <ak@suse.de> to x86-64.55- *66- * Derived from asm-i386/semaphore.h and asm-i386/rwsem.h77- *88- *99- * The MSW of the count is the negated number of active writers and waiting1010- * lockers, and the LSW is the total number of active locks1111- *1212- * The lock count is initialized to 0 (no active and no waiting lockers).1313- *1414- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an1515- * uncontended lock. This can be determined because XADD returns the old value.1616- * Readers increment by 1 and see a positive value when uncontended, negative1717- * if there are writers (and maybe) readers waiting (in which case it goes to1818- * sleep).1919- *2020- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can2121- * be extended to 65534 by manually checking the whole MSW rather than relying2222- * on the S flag.2323- *2424- * The value of ACTIVE_BIAS supports up to 65535 active processes.2525- *2626- * This should be totally fair - if anything is waiting, a process that wants a2727- * lock will go to the back of the queue. When the currently active lock is2828- * released, if there's a writer at the front of the queue, then that and only2929- * that will be woken up; if there's a bunch of consecutive readers at the3030- * front, then they'll all be woken up, but no other readers will be.3131- */3232-3333-#ifndef _X8664_RWSEM_H3434-#define _X8664_RWSEM_H3535-3636-#ifndef _LINUX_RWSEM_H3737-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"3838-#endif3939-4040-#ifdef __KERNEL__4141-4242-#include <linux/list.h>4343-#include <linux/spinlock.h>4444-4545-struct rwsem_waiter;4646-4747-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);4848-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);4949-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);5050-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);5151-5252-/*5353- * the semaphore definition5454- */5555-struct rw_semaphore {5656- signed int count;5757-#define RWSEM_UNLOCKED_VALUE 0x000000005858-#define RWSEM_ACTIVE_BIAS 0x000000015959-#define RWSEM_ACTIVE_MASK 0x0000ffff6060-#define RWSEM_WAITING_BIAS (-0x00010000)6161-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS6262-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)6363- spinlock_t wait_lock;6464- struct list_head wait_list;6565-#if RWSEM_DEBUG6666- int debug;6767-#endif6868-};6969-7070-/*7171- * initialisation7272- */7373-#if RWSEM_DEBUG7474-#define __RWSEM_DEBUG_INIT , 07575-#else7676-#define __RWSEM_DEBUG_INIT /* */7777-#endif7878-7979-#define __RWSEM_INITIALIZER(name) \8080-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \8181- __RWSEM_DEBUG_INIT }8282-8383-#define DECLARE_RWSEM(name) \8484- struct rw_semaphore name = __RWSEM_INITIALIZER(name)8585-8686-static inline void init_rwsem(struct rw_semaphore *sem)8787-{8888- sem->count = RWSEM_UNLOCKED_VALUE;8989- spin_lock_init(&sem->wait_lock);9090- INIT_LIST_HEAD(&sem->wait_list);9191-#if RWSEM_DEBUG9292- sem->debug = 0;9393-#endif9494-}9595-9696-/*9797- * lock for reading9898- */9999-static inline void __down_read(struct rw_semaphore *sem)100100-{101101- __asm__ __volatile__(102102- "# beginning down_read\n\t"103103-LOCK_PREFIX " incl (%%rdi)\n\t" /* adds 0x00000001, returns the old value */104104- " js 2f\n\t" /* jump if we weren't granted the lock */105105- "1:\n\t"106106- LOCK_SECTION_START("") \107107- "2:\n\t"108108- " call rwsem_down_read_failed_thunk\n\t"109109- " jmp 1b\n"110110- LOCK_SECTION_END \111111- "# ending down_read\n\t"112112- : "+m"(sem->count)113113- : "D"(sem)114114- : "memory", "cc");115115-}116116-117117-118118-/*119119- * trylock for reading -- returns 1 if successful, 0 if contention120120- */121121-static inline int __down_read_trylock(struct rw_semaphore *sem)122122-{123123- __s32 result, tmp;124124- __asm__ __volatile__(125125- "# beginning __down_read_trylock\n\t"126126- " movl %0,%1\n\t"127127- "1:\n\t"128128- " movl %1,%2\n\t"129129- " addl %3,%2\n\t"130130- " jle 2f\n\t"131131-LOCK_PREFIX " cmpxchgl %2,%0\n\t"132132- " jnz 1b\n\t"133133- "2:\n\t"134134- "# ending __down_read_trylock\n\t"135135- : "+m"(sem->count), "=&a"(result), "=&r"(tmp)136136- : "i"(RWSEM_ACTIVE_READ_BIAS)137137- : "memory", "cc");138138- return result>=0 ? 1 : 0;139139-}140140-141141-142142-/*143143- * lock for writing144144- */145145-static inline void __down_write(struct rw_semaphore *sem)146146-{147147- int tmp;148148-149149- tmp = RWSEM_ACTIVE_WRITE_BIAS;150150- __asm__ __volatile__(151151- "# beginning down_write\n\t"152152-LOCK_PREFIX " xaddl %0,(%%rdi)\n\t" /* subtract 0x0000ffff, returns the old value */153153- " testl %0,%0\n\t" /* was the count 0 before? */154154- " jnz 2f\n\t" /* jump if we weren't granted the lock */155155- "1:\n\t"156156- LOCK_SECTION_START("")157157- "2:\n\t"158158- " call rwsem_down_write_failed_thunk\n\t"159159- " jmp 1b\n"160160- LOCK_SECTION_END161161- "# ending down_write"162162- : "=&r" (tmp) 163163- : "0"(tmp), "D"(sem)164164- : "memory", "cc");165165-}166166-167167-/*168168- * trylock for writing -- returns 1 if successful, 0 if contention169169- */170170-static inline int __down_write_trylock(struct rw_semaphore *sem)171171-{172172- signed long ret = cmpxchg(&sem->count,173173- RWSEM_UNLOCKED_VALUE, 174174- RWSEM_ACTIVE_WRITE_BIAS);175175- if (ret == RWSEM_UNLOCKED_VALUE)176176- return 1;177177- return 0;178178-}179179-180180-/*181181- * unlock after reading182182- */183183-static inline void __up_read(struct rw_semaphore *sem)184184-{185185- __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;186186- __asm__ __volatile__(187187- "# beginning __up_read\n\t"188188-LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* subtracts 1, returns the old value */189189- " js 2f\n\t" /* jump if the lock is being waited upon */190190- "1:\n\t"191191- LOCK_SECTION_START("")192192- "2:\n\t"193193- " decw %w[tmp]\n\t" /* do nothing if still outstanding active readers */194194- " jnz 1b\n\t"195195- " call rwsem_wake_thunk\n\t"196196- " jmp 1b\n"197197- LOCK_SECTION_END198198- "# ending __up_read\n"199199- : "+m"(sem->count), [tmp] "+r" (tmp)200200- : "D"(sem)201201- : "memory", "cc");202202-}203203-204204-/*205205- * unlock after writing206206- */207207-static inline void __up_write(struct rw_semaphore *sem)208208-{209209- unsigned tmp; 210210- __asm__ __volatile__(211211- "# beginning __up_write\n\t"212212- " movl %[bias],%[tmp]\n\t"213213-LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */214214- " jnz 2f\n\t" /* jump if the lock is being waited upon */215215- "1:\n\t"216216- LOCK_SECTION_START("")217217- "2:\n\t"218218- " decw %w[tmp]\n\t" /* did the active count reduce to 0? */219219- " jnz 1b\n\t" /* jump back if not */220220- " call rwsem_wake_thunk\n\t"221221- " jmp 1b\n"222222- LOCK_SECTION_END223223- "# ending __up_write\n"224224- : "+m"(sem->count), [tmp] "=r" (tmp)225225- : "D"(sem), [bias] "i"(-RWSEM_ACTIVE_WRITE_BIAS)226226- : "memory", "cc");227227-}228228-229229-/*230230- * downgrade write lock to read lock231231- */232232-static inline void __downgrade_write(struct rw_semaphore *sem)233233-{234234- __asm__ __volatile__(235235- "# beginning __downgrade_write\n\t"236236-LOCK_PREFIX " addl %[bias],(%%rdi)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */237237- " js 2f\n\t" /* jump if the lock is being waited upon */238238- "1:\n\t"239239- LOCK_SECTION_START("")240240- "2:\n\t"241241- " call rwsem_downgrade_thunk\n"242242- " jmp 1b\n"243243- LOCK_SECTION_END244244- "# ending __downgrade_write\n"245245- : "=m"(sem->count)246246- : "D"(sem), [bias] "i"(-RWSEM_WAITING_BIAS), "m"(sem->count)247247- : "memory", "cc");248248-}249249-250250-/*251251- * implement atomic add functionality252252- */253253-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)254254-{255255- __asm__ __volatile__(256256-LOCK_PREFIX "addl %1,%0"257257- :"=m"(sem->count)258258- :"ir"(delta), "m"(sem->count));259259-}260260-261261-/*262262- * implement exchange and add functionality263263- */264264-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)265265-{266266- int tmp = delta;267267-268268- __asm__ __volatile__(269269-LOCK_PREFIX "xaddl %0,(%2)"270270- : "=r"(tmp), "=m"(sem->count)271271- : "r"(sem), "m"(sem->count), "0" (tmp)272272- : "memory");273273-274274- return tmp+delta;275275-}276276-277277-static inline int rwsem_is_locked(struct rw_semaphore *sem)278278-{279279- return (sem->count != 0);280280-}281281-282282-#endif /* __KERNEL__ */283283-#endif /* _X8664_RWSEM_H */
+2-1
include/asm-x86_64/smp.h
···4747extern void unlock_ipi_call_lock(void);4848extern int smp_num_siblings;4949extern void smp_send_reschedule(int cpu);5050-extern void zap_low_mappings(void);5150void smp_stop_cpu(void);5251extern int smp_call_function_single(int cpuid, void (*func) (void *info),5352 void *info, int retry, int wait);···8182extern int __cpu_disable(void);8283extern void __cpu_die(unsigned int cpu);8384extern void prefill_possible_map(void);8585+extern unsigned num_processors;8686+extern unsigned disabled_cpus;84878588#endif /* !ASSEMBLY */8689
···8484 return order; /* We could be slightly more clever with -1 here... */8585}86868787+static __inline__ int get_count_order(unsigned int count)8888+{8989+ int order;9090+9191+ order = fls(count) - 1;9292+ if (count & (count - 1))9393+ order++;9494+ return order;9595+}9696+8797/*8898 * hweightN: returns the hamming weight (i.e. the number8999 * of bits set) of a N-bit word
+11
include/linux/gfp.h
···1414/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */1515#define __GFP_DMA ((__force gfp_t)0x01u)1616#define __GFP_HIGHMEM ((__force gfp_t)0x02u)1717+#ifdef CONFIG_DMA_IS_DMA321818+#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */1919+#elif BITS_PER_LONG < 642020+#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */2121+#else2222+#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */2323+#endif17241825/*1926 * Action modifiers - doesn't change the zoning···6962 platforms, used as appropriate on others */70637164#define GFP_DMA __GFP_DMA6565+6666+/* 4GB DMA on some platforms */6767+#define GFP_DMA32 __GFP_DMA326868+72697370#define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK))7471
+2-8
include/linux/mm.h
···206206struct mmu_gather;207207struct inode;208208209209-#ifdef ARCH_HAS_ATOMIC_UNSIGNED210210-typedef unsigned page_flags_t;211211-#else212212-typedef unsigned long page_flags_t;213213-#endif214214-215209/*216210 * Each physical page in the system has a struct page associated with217211 * it to keep track of whatever it is we are using the page for at the···213219 * a page.214220 */215221struct page {216216- page_flags_t flags; /* Atomic flags, some possibly222222+ unsigned long flags; /* Atomic flags, some possibly217223 * updated asynchronously */218224 atomic_t _count; /* Usage count, see below. */219225 atomic_t _mapcount; /* Count of ptes mapped in mms,···429435#endif430436431437/* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */432432-#define SECTIONS_PGOFF ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH)438438+#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)433439#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH)434440#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)435441
+12-8
include/linux/mmzone.h
···7171#endif72727373#define ZONE_DMA 07474-#define ZONE_NORMAL 17575-#define ZONE_HIGHMEM 27474+#define ZONE_DMA32 17575+#define ZONE_NORMAL 27676+#define ZONE_HIGHMEM 376777777-#define MAX_NR_ZONES 3 /* Sync this with ZONES_SHIFT */7878+#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */7879#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */79808081···109108110109/*111110 * On machines where it is needed (eg PCs) we divide physical memory112112- * into multiple physical zones. On a PC we have 3 zones:111111+ * into multiple physical zones. On a PC we have 4 zones:113112 *114113 * ZONE_DMA < 16 MB ISA DMA capable memory114114+ * ZONE_DMA32 0 MB Empty115115 * ZONE_NORMAL 16-896 MB direct mapped by the kernel116116 * ZONE_HIGHMEM > 896 MB only page cache and user processes117117 */···435433436434#include <linux/topology.h>437435/* Returns the number of the current Node. */436436+#ifndef numa_node_id438437#define numa_node_id() (cpu_to_node(raw_smp_processor_id()))438438+#endif439439440440#ifndef CONFIG_NEED_MULTIPLE_NODES441441···457453#include <asm/sparsemem.h>458454#endif459455460460-#if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)456456+#if BITS_PER_LONG == 32461457/*462462- * with 32 bit page->flags field, we reserve 8 bits for node/zone info.463463- * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes.458458+ * with 32 bit page->flags field, we reserve 9 bits for node/zone info.459459+ * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes.464460 */465465-#define FLAGS_RESERVED 8461461+#define FLAGS_RESERVED 9466462467463#elif BITS_PER_LONG == 64468464/*
+1-1
mm/filemap.c
···134134 struct address_space *mapping;135135 struct page *page;136136137137- page = container_of((page_flags_t *)word, struct page, flags);137137+ page = container_of((unsigned long *)word, struct page, flags);138138139139 /*140140 * page_mapping() is being called without PG_locked held.
+14-6
mm/page_alloc.c
···6060 * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA6161 * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL6262 * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA6363+ *6464+ * TBD: should special case ZONE_DMA32 machines here - in those we normally6565+ * don't need any ZONE_NORMAL reservation6366 */6464-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };6767+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };65686669EXPORT_SYMBOL(totalram_pages);6770···7572struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;7673EXPORT_SYMBOL(zone_table);77747878-static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };7575+static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };7976int min_free_kbytes = 1024;80778178unsigned long __initdata nr_kernel_pages;···127124 printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",128125 function, current->comm, page);129126 printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",130130- (int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,127127+ (int)(2*sizeof(unsigned long)), (unsigned long)page->flags,131128 page->mapping, page_mapcount(page), page_count(page));132129 printk(KERN_EMERG "Backtrace:\n");133130 dump_stack();···14241421 zone = pgdat->node_zones + ZONE_NORMAL;14251422 if (zone->present_pages)14261423 zonelist->zones[j++] = zone;14241424+ case ZONE_DMA32:14251425+ zone = pgdat->node_zones + ZONE_DMA32;14261426+ if (zone->present_pages)14271427+ zonelist->zones[j++] = zone;14271428 case ZONE_DMA:14281429 zone = pgdat->node_zones + ZONE_DMA;14291430 if (zone->present_pages)···14421435 int res = ZONE_NORMAL;14431436 if (zone_bits & (__force int)__GFP_HIGHMEM)14441437 res = ZONE_HIGHMEM;14381438+ if (zone_bits & (__force int)__GFP_DMA32)14391439+ res = ZONE_DMA32;14451440 if (zone_bits & (__force int)__GFP_DMA)14461441 res = ZONE_DMA;14471442 return res;···18551846 if (process_zones(cpu))18561847 ret = NOTIFY_BAD;18571848 break;18581858-#ifdef CONFIG_HOTPLUG_CPU18491849+ case CPU_UP_CANCELED:18591850 case CPU_DEAD:18601851 free_zone_pagesets(cpu);18611852 break;18621862-#endif18631853 default:18641854 break;18651855 }···19631955 if (zholes_size)19641956 realsize -= zholes_size[j];1965195719661966- if (j == ZONE_DMA || j == ZONE_NORMAL)19581958+ if (j < ZONE_HIGHMEM)19671959 nr_kernel_pages += realsize;19681960 nr_all_pages += realsize;19691961