···11861186L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)11871187S: Maintained11881188F: arch/arm/mach-mvebu/11891189-F: drivers/rtc/armada38x-rtc11891189+F: drivers/rtc/rtc-armada38x.c1190119011911191ARM/Marvell Berlin SoC support11921192M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>···16751675F: include/linux/platform_data/at24.h1676167616771677ATA OVER ETHERNET (AOE) DRIVER16781678-M: "Ed L. Cashin" <ecashin@coraid.com>16791679-W: http://support.coraid.com/support/linux16781678+M: "Ed L. Cashin" <ed.cashin@acm.org>16791679+W: http://www.openaoe.org/16801680S: Supported16811681F: Documentation/aoe/16821682F: drivers/block/aoe/···32513251S: Maintained32523252F: Documentation/hwmon/dme173732533253F: drivers/hwmon/dme1737.c32543254+32553255+DMI/SMBIOS SUPPORT32563256+M: Jean Delvare <jdelvare@suse.de>32573257+S: Maintained32583258+F: drivers/firmware/dmi-id.c32593259+F: drivers/firmware/dmi_scan.c32603260+F: include/linux/dmi.h3254326132553262DOCKING STATION DRIVER32563263M: Shaohua Li <shaohua.li@intel.com>
+1-1
arch/arm/plat-omap/counter_32k.c
···103103104104 /*105105 * 120000 rough estimate from the calculations in106106- * __clocksource_updatefreq_scale.106106+ * __clocksource_update_freq_scale.107107 */108108 clocks_calc_mult_shift(&persistent_mult, &persistent_shift,109109 32768, NSEC_PER_SEC, 120000);
+23-7
arch/arm64/include/asm/cmpxchg.h
···246246 __ret; \247247})248248249249-#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)250250-#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)251251-#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)252252-#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)249249+#define _protect_cmpxchg_local(pcp, o, n) \250250+({ \251251+ typeof(*raw_cpu_ptr(&(pcp))) __ret; \252252+ preempt_disable(); \253253+ __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \254254+ preempt_enable(); \255255+ __ret; \256256+})253257254254-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \255255- cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \256256- o1, o2, n1, n2)258258+#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)259259+#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)260260+#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)261261+#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)262262+263263+#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \264264+({ \265265+ int __ret; \266266+ preempt_disable(); \267267+ __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \268268+ raw_cpu_ptr(&(ptr2)), \269269+ o1, o2, n1, n2); \270270+ preempt_enable(); \271271+ __ret; \272272+})257273258274#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n))259275#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))
+9
arch/arm64/include/asm/mmu_context.h
···151151{152152 unsigned int cpu = smp_processor_id();153153154154+ /*155155+ * init_mm.pgd does not contain any user mappings and it is always156156+ * active for kernel addresses in TTBR1. Just set the reserved TTBR0.157157+ */158158+ if (next == &init_mm) {159159+ cpu_set_reserved_ttbr0();160160+ return;161161+ }162162+154163 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)155164 check_and_switch_context(next, tsk);156165}
···11+/*22+ * Meta page table definitions.33+ */44+55+#ifndef _METAG_PGTABLE_BITS_H66+#define _METAG_PGTABLE_BITS_H77+88+#include <asm/metag_mem.h>99+1010+/*1111+ * Definitions for MMU descriptors1212+ *1313+ * These are the hardware bits in the MMCU pte entries.1414+ * Derived from the Meta toolkit headers.1515+ */1616+#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT1717+#define _PAGE_WRITE MMCU_ENTRY_WR_BIT1818+#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT1919+/* Write combine bit - this can cause writes to occur out of order */2020+#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT2121+/* Sys coherent bit - this bit is never used by Linux */2222+#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT2323+#define _PAGE_ALWAYS_ZERO_1 0x0202424+#define _PAGE_CACHE_CTRL0 0x0402525+#define _PAGE_CACHE_CTRL1 0x0802626+#define _PAGE_ALWAYS_ZERO_2 0x1002727+#define _PAGE_ALWAYS_ZERO_3 0x2002828+#define _PAGE_ALWAYS_ZERO_4 0x4002929+#define _PAGE_ALWAYS_ZERO_5 0x8003030+3131+/* These are software bits that we stuff into the gaps in the hardware3232+ * pte entries that are not used. Note, these DO get stored in the actual3333+ * hardware, but the hardware just does not use them.3434+ */3535+#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_13636+#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_23737+3838+/* Pages owned, and protected by, the kernel. */3939+#define _PAGE_KERNEL _PAGE_PRIV4040+4141+/* No cacheing of this page */4242+#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)4343+/* burst cacheing - good for data streaming */4444+#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)4545+/* One cache way per thread */4646+#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)4747+/* Full on cacheing */4848+#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)4949+5050+#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)5151+5252+/* which bits are used for cache control ... */5353+#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \5454+ _PAGE_WR_COMBINE)5555+5656+/* This is a mask of the bits that pte_modify is allowed to change. */5757+#define _PAGE_CHG_MASK (PAGE_MASK)5858+5959+#define _PAGE_SZ_SHIFT 16060+#define _PAGE_SZ_4K (0x0)6161+#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT)6262+#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT)6363+#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT)6464+#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT)6565+#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT)6666+#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT)6767+#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT)6868+#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT)6969+#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT)7070+#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT)7171+#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT)7272+7373+#if defined(CONFIG_PAGE_SIZE_4K)7474+#define _PAGE_SZ (_PAGE_SZ_4K)7575+#elif defined(CONFIG_PAGE_SIZE_8K)7676+#define _PAGE_SZ (_PAGE_SZ_8K)7777+#elif defined(CONFIG_PAGE_SIZE_16K)7878+#define _PAGE_SZ (_PAGE_SZ_16K)7979+#endif8080+#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT)8181+8282+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)8383+# define _PAGE_SZHUGE (_PAGE_SZ_8K)8484+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)8585+# define _PAGE_SZHUGE (_PAGE_SZ_16K)8686+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)8787+# define _PAGE_SZHUGE (_PAGE_SZ_32K)8888+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)8989+# define _PAGE_SZHUGE (_PAGE_SZ_64K)9090+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)9191+# define _PAGE_SZHUGE (_PAGE_SZ_128K)9292+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)9393+# define _PAGE_SZHUGE (_PAGE_SZ_256K)9494+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)9595+# define _PAGE_SZHUGE (_PAGE_SZ_512K)9696+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)9797+# define _PAGE_SZHUGE (_PAGE_SZ_1M)9898+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)9999+# define _PAGE_SZHUGE (_PAGE_SZ_2M)100100+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)101101+# define _PAGE_SZHUGE (_PAGE_SZ_4M)102102+#endif103103+104104+#endif /* _METAG_PGTABLE_BITS_H */
+1-94
arch/metag/include/asm/pgtable.h
···55#ifndef _METAG_PGTABLE_H66#define _METAG_PGTABLE_H7788+#include <asm/pgtable-bits.h>89#include <asm-generic/pgtable-nopmd.h>9101011/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */···1918#define CONSISTENT_END 0x773FFFFF2019#define VMALLOC_START 0x780000002120#define VMALLOC_END 0x7FFFFFFF2222-#endif2323-2424-/*2525- * Definitions for MMU descriptors2626- *2727- * These are the hardware bits in the MMCU pte entries.2828- * Derived from the Meta toolkit headers.2929- */3030-#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT3131-#define _PAGE_WRITE MMCU_ENTRY_WR_BIT3232-#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT3333-/* Write combine bit - this can cause writes to occur out of order */3434-#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT3535-/* Sys coherent bit - this bit is never used by Linux */3636-#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT3737-#define _PAGE_ALWAYS_ZERO_1 0x0203838-#define _PAGE_CACHE_CTRL0 0x0403939-#define _PAGE_CACHE_CTRL1 0x0804040-#define _PAGE_ALWAYS_ZERO_2 0x1004141-#define _PAGE_ALWAYS_ZERO_3 0x2004242-#define _PAGE_ALWAYS_ZERO_4 0x4004343-#define _PAGE_ALWAYS_ZERO_5 0x8004444-4545-/* These are software bits that we stuff into the gaps in the hardware4646- * pte entries that are not used. Note, these DO get stored in the actual4747- * hardware, but the hardware just does not use them.4848- */4949-#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_15050-#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_25151-5252-/* Pages owned, and protected by, the kernel. */5353-#define _PAGE_KERNEL _PAGE_PRIV5454-5555-/* No cacheing of this page */5656-#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)5757-/* burst cacheing - good for data streaming */5858-#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)5959-/* One cache way per thread */6060-#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)6161-/* Full on cacheing */6262-#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)6363-6464-#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)6565-6666-/* which bits are used for cache control ... */6767-#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \6868- _PAGE_WR_COMBINE)6969-7070-/* This is a mask of the bits that pte_modify is allowed to change. */7171-#define _PAGE_CHG_MASK (PAGE_MASK)7272-7373-#define _PAGE_SZ_SHIFT 17474-#define _PAGE_SZ_4K (0x0)7575-#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT)7676-#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT)7777-#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT)7878-#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT)7979-#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT)8080-#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT)8181-#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT)8282-#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT)8383-#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT)8484-#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT)8585-#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT)8686-8787-#if defined(CONFIG_PAGE_SIZE_4K)8888-#define _PAGE_SZ (_PAGE_SZ_4K)8989-#elif defined(CONFIG_PAGE_SIZE_8K)9090-#define _PAGE_SZ (_PAGE_SZ_8K)9191-#elif defined(CONFIG_PAGE_SIZE_16K)9292-#define _PAGE_SZ (_PAGE_SZ_16K)9393-#endif9494-#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT)9595-9696-#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)9797-# define _PAGE_SZHUGE (_PAGE_SZ_8K)9898-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)9999-# define _PAGE_SZHUGE (_PAGE_SZ_16K)100100-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)101101-# define _PAGE_SZHUGE (_PAGE_SZ_32K)102102-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)103103-# define _PAGE_SZHUGE (_PAGE_SZ_64K)104104-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)105105-# define _PAGE_SZHUGE (_PAGE_SZ_128K)106106-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)107107-# define _PAGE_SZHUGE (_PAGE_SZ_256K)108108-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)109109-# define _PAGE_SZHUGE (_PAGE_SZ_512K)110110-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)111111-# define _PAGE_SZHUGE (_PAGE_SZ_1M)112112-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)113113-# define _PAGE_SZHUGE (_PAGE_SZ_2M)114114-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)115115-# define _PAGE_SZHUGE (_PAGE_SZ_4M)11621#endif1172211823/*
···14081408 bne 9f /* continue in V mode if we are. */14091409141014105:14111411-#ifdef CONFIG_KVM_BOOK3S_64_HV14111411+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER14121412 /*14131413 * We are coming from kernel context. Check if we are coming from14141414 * guest. if yes, then we can continue. We will fall through
+12-2
arch/powerpc/platforms/powernv/smp.c
···3333#include <asm/runlatch.h>3434#include <asm/code-patching.h>3535#include <asm/dbell.h>3636+#include <asm/kvm_ppc.h>3737+#include <asm/ppc-opcode.h>36383739#include "powernv.h"3840···151149static void pnv_smp_cpu_kill_self(void)152150{153151 unsigned int cpu;154154- unsigned long srr1;152152+ unsigned long srr1, wmask;155153 u32 idle_states;156154157155 /* Standard hot unplug procedure */···162160 DBG("CPU%d offline\n", cpu);163161 generic_set_cpu_dead(cpu);164162 smp_wmb();163163+164164+ wmask = SRR1_WAKEMASK;165165+ if (cpu_has_feature(CPU_FTR_ARCH_207S))166166+ wmask = SRR1_WAKEMASK_P8;165167166168 idle_states = pnv_get_supported_cpuidle_states();167169 /* We don't want to take decrementer interrupts while we are offline,···197191 * having finished executing in a KVM guest, then srr1198192 * contains 0.199193 */200200- if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) {194194+ if ((srr1 & wmask) == SRR1_WAKEEE) {201195 icp_native_flush_interrupt();202196 local_paca->irq_happened &= PACA_IRQ_HARD_DIS;203197 smp_mb();198198+ } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {199199+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);200200+ asm volatile(PPC_MSGCLR(%0) : : "r" (msg));201201+ kvmppc_set_host_ipi(cpu, 0);204202 }205203206204 if (cpu_core_split_required())
+23-21
arch/powerpc/platforms/pseries/mobility.c
···2525static struct kobject *mobility_kobj;26262727struct update_props_workarea {2828- u32 phandle;2929- u32 state;3030- u64 reserved;3131- u32 nprops;2828+ __be32 phandle;2929+ __be32 state;3030+ __be64 reserved;3131+ __be32 nprops;3232} __packed;33333434#define NODE_ACTION_MASK 0xff000000···5454 return rc;5555}56565757-static int delete_dt_node(u32 phandle)5757+static int delete_dt_node(__be32 phandle)5858{5959 struct device_node *dn;60606161- dn = of_find_node_by_phandle(phandle);6161+ dn = of_find_node_by_phandle(be32_to_cpu(phandle));6262 if (!dn)6363 return -ENOENT;6464···127127 return 0;128128}129129130130-static int update_dt_node(u32 phandle, s32 scope)130130+static int update_dt_node(__be32 phandle, s32 scope)131131{132132 struct update_props_workarea *upwa;133133 struct device_node *dn;···136136 char *prop_data;137137 char *rtas_buf;138138 int update_properties_token;139139+ u32 nprops;139140 u32 vd;140141141142 update_properties_token = rtas_token("ibm,update-properties");···147146 if (!rtas_buf)148147 return -ENOMEM;149148150150- dn = of_find_node_by_phandle(phandle);149149+ dn = of_find_node_by_phandle(be32_to_cpu(phandle));151150 if (!dn) {152151 kfree(rtas_buf);153152 return -ENOENT;···163162 break;164163165164 prop_data = rtas_buf + sizeof(*upwa);165165+ nprops = be32_to_cpu(upwa->nprops);166166167167 /* On the first call to ibm,update-properties for a node the168168 * the first property value descriptor contains an empty···172170 */173171 if (*prop_data == 0) {174172 prop_data++;175175- vd = *(u32 *)prop_data;173173+ vd = be32_to_cpu(*(__be32 *)prop_data);176174 prop_data += vd + sizeof(vd);177177- upwa->nprops--;175175+ nprops--;178176 }179177180180- for (i = 0; i < upwa->nprops; i++) {178178+ for (i = 0; i < nprops; i++) {181179 char *prop_name;182180183181 prop_name = prop_data;184182 prop_data += strlen(prop_name) + 1;185185- vd = *(u32 *)prop_data;183183+ vd = be32_to_cpu(*(__be32 *)prop_data);186184 prop_data += sizeof(vd);187185188186 switch (vd) {···214212 return 0;215213}216214217217-static int add_dt_node(u32 parent_phandle, u32 drc_index)215215+static int add_dt_node(__be32 parent_phandle, __be32 drc_index)218216{219217 struct device_node *dn;220218 struct device_node *parent_dn;221219 int rc;222220223223- parent_dn = of_find_node_by_phandle(parent_phandle);221221+ parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));224222 if (!parent_dn)225223 return -ENOENT;226224···239237int pseries_devicetree_update(s32 scope)240238{241239 char *rtas_buf;242242- u32 *data;240240+ __be32 *data;243241 int update_nodes_token;244242 int rc;245243···256254 if (rc && rc != 1)257255 break;258256259259- data = (u32 *)rtas_buf + 4;260260- while (*data & NODE_ACTION_MASK) {257257+ data = (__be32 *)rtas_buf + 4;258258+ while (be32_to_cpu(*data) & NODE_ACTION_MASK) {261259 int i;262262- u32 action = *data & NODE_ACTION_MASK;263263- int node_count = *data & NODE_COUNT_MASK;260260+ u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;261261+ u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;264262265263 data++;266264267265 for (i = 0; i < node_count; i++) {268268- u32 phandle = *data++;269269- u32 drc_index;266266+ __be32 phandle = *data++;267267+ __be32 drc_index;270268271269 switch (action) {272270 case DELETE_DT_NODE:
···57575858unsigned long ftrace_plt;59596060+static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)6161+{6262+#ifdef CC_USING_HOTPATCH6363+ /* brcl 0,0 */6464+ insn->opc = 0xc004;6565+ insn->disp = 0;6666+#else6767+ /* stg r14,8(r15) */6868+ insn->opc = 0xe3e0;6969+ insn->disp = 0xf0080024;7070+#endif7171+}7272+7373+static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)7474+{7575+#ifdef CONFIG_KPROBES7676+ if (insn->opc == BREAKPOINT_INSTRUCTION)7777+ return 1;7878+#endif7979+ return 0;8080+}8181+8282+static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)8383+{8484+#ifdef CONFIG_KPROBES8585+ insn->opc = BREAKPOINT_INSTRUCTION;8686+ insn->disp = KPROBE_ON_FTRACE_NOP;8787+#endif8888+}8989+9090+static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)9191+{9292+#ifdef CONFIG_KPROBES9393+ insn->opc = BREAKPOINT_INSTRUCTION;9494+ insn->disp = KPROBE_ON_FTRACE_CALL;9595+#endif9696+}9797+6098int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,6199 unsigned long addr)62100{···11072 return -EFAULT;11173 if (addr == MCOUNT_ADDR) {11274 /* Initial code replacement */113113-#ifdef CC_USING_HOTPATCH114114- /* We expect to see brcl 0,0 */115115- ftrace_generate_nop_insn(&orig);116116-#else117117- /* We expect to see stg r14,8(r15) */118118- orig.opc = 0xe3e0;119119- orig.disp = 0xf0080024;120120-#endif7575+ ftrace_generate_orig_insn(&orig);12176 ftrace_generate_nop_insn(&new);122122- } else if (old.opc == BREAKPOINT_INSTRUCTION) {7777+ } else if (is_kprobe_on_ftrace(&old)) {12378 /*12479 * If we find a breakpoint instruction, a kprobe has been12580 * placed at the beginning of the function. We write the···12089 * bytes of the original instruction so that the kprobes12190 * handler can execute a nop, if it reaches this breakpoint.12291 */123123- new.opc = orig.opc = BREAKPOINT_INSTRUCTION;124124- orig.disp = KPROBE_ON_FTRACE_CALL;125125- new.disp = KPROBE_ON_FTRACE_NOP;9292+ ftrace_generate_kprobe_call_insn(&orig);9393+ ftrace_generate_kprobe_nop_insn(&new);12694 } else {12795 /* Replace ftrace call with a nop. */12896 ftrace_generate_call_insn(&orig, rec->ip);···141111142112 if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))143113 return -EFAULT;144144- if (old.opc == BREAKPOINT_INSTRUCTION) {114114+ if (is_kprobe_on_ftrace(&old)) {145115 /*146116 * If we find a breakpoint instruction, a kprobe has been147117 * placed at the beginning of the function. We write the···149119 * bytes of the original instruction so that the kprobes150120 * handler can execute a brasl if it reaches this breakpoint.151121 */152152- new.opc = orig.opc = BREAKPOINT_INSTRUCTION;153153- orig.disp = KPROBE_ON_FTRACE_NOP;154154- new.disp = KPROBE_ON_FTRACE_CALL;122122+ ftrace_generate_kprobe_nop_insn(&orig);123123+ ftrace_generate_kprobe_call_insn(&new);155124 } else {156125 /* Replace nop with an ftrace call. */157126 ftrace_generate_nop_insn(&orig);
···1978197819791979 data = cyc2ns_read_begin();1980198019811981+ /*19821982+ * Internal timekeeping for enabled/running/stopped times19831983+ * is always in the local_clock domain.19841984+ */19811985 userpg->cap_user_time = 1;19821986 userpg->time_mult = data->cyc2ns_mul;19831987 userpg->time_shift = data->cyc2ns_shift;19841988 userpg->time_offset = data->cyc2ns_offset - now;1985198919861986- userpg->cap_user_time_zero = 1;19871987- userpg->time_zero = data->cyc2ns_offset;19901990+ /*19911991+ * cap_user_time_zero doesn't make sense when we're using a different19921992+ * time base for the records.19931993+ */19941994+ if (event->clock == &local_clock) {19951995+ userpg->cap_user_time_zero = 1;19961996+ userpg->time_zero = data->cyc2ns_offset;19971997+ }1988199819891999 cyc2ns_read_end(data);19902000}
···278278 /*279279 * We're out of tags on this hardware queue, kick any280280 * pending IO submits before going to sleep waiting for281281- * some to complete.281281+ * some to complete. Note that hctx can be NULL here for282282+ * reserved tag allocation.282283 */283283- blk_mq_run_hw_queue(hctx, false);284284+ if (hctx)285285+ blk_mq_run_hw_queue(hctx, false);284286285287 /*286288 * Retry tag allocation after running the hardware queue,
···210210211211 ret = em_sti_start(p, USER_CLOCKSOURCE);212212 if (!ret)213213- __clocksource_updatefreq_hz(cs, p->rate);213213+ __clocksource_update_freq_hz(cs, p->rate);214214 return ret;215215}216216
+1-1
drivers/clocksource/sh_cmt.c
···641641642642 ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);643643 if (!ret) {644644- __clocksource_updatefreq_hz(cs, ch->rate);644644+ __clocksource_update_freq_hz(cs, ch->rate);645645 ch->cs_enabled = true;646646 }647647 return ret;
+1-1
drivers/clocksource/sh_tmu.c
···272272273273 ret = sh_tmu_enable(ch);274274 if (!ret) {275275- __clocksource_updatefreq_hz(cs, ch->rate);275275+ __clocksource_update_freq_hz(cs, ch->rate);276276 ch->cs_enabled = true;277277 }278278
+1-12
drivers/gpu/drm/drm_crtc.c
···525525}526526EXPORT_SYMBOL(drm_framebuffer_reference);527527528528-static void drm_framebuffer_free_bug(struct kref *kref)529529-{530530- BUG();531531-}532532-533533-static void __drm_framebuffer_unreference(struct drm_framebuffer *fb)534534-{535535- DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount));536536- kref_put(&fb->refcount, drm_framebuffer_free_bug);537537-}538538-539528/**540529 * drm_framebuffer_unregister_private - unregister a private fb from the lookup idr541530 * @fb: fb to unregister···13091320 return;13101321 }13111322 /* disconnect the plane from the fb and crtc: */13121312- __drm_framebuffer_unreference(plane->old_fb);13231323+ drm_framebuffer_unreference(plane->old_fb);13131324 plane->old_fb = NULL;13141325 plane->fb = NULL;13151326 plane->crtc = NULL;
+21-17
drivers/gpu/drm/i915/i915_gem.c
···2737273727382738 WARN_ON(i915_verify_lists(ring->dev));2739273927402740- /* Move any buffers on the active list that are no longer referenced27412741- * by the ringbuffer to the flushing/inactive lists as appropriate,27422742- * before we free the context associated with the requests.27402740+ /* Retire requests first as we use it above for the early return.27412741+ * If we retire requests last, we may use a later seqno and so clear27422742+ * the requests lists without clearing the active list, leading to27432743+ * confusion.27432744 */27442744- while (!list_empty(&ring->active_list)) {27452745- struct drm_i915_gem_object *obj;27462746-27472747- obj = list_first_entry(&ring->active_list,27482748- struct drm_i915_gem_object,27492749- ring_list);27502750-27512751- if (!i915_gem_request_completed(obj->last_read_req, true))27522752- break;27532753-27542754- i915_gem_object_move_to_inactive(obj);27552755- }27562756-27572757-27582745 while (!list_empty(&ring->request_list)) {27592746 struct drm_i915_gem_request *request;27602747 struct intel_ringbuffer *ringbuf;···27742787 ringbuf->last_retired_head = request->postfix;2775278827762789 i915_gem_free_request(request);27902790+ }27912791+27922792+ /* Move any buffers on the active list that are no longer referenced27932793+ * by the ringbuffer to the flushing/inactive lists as appropriate,27942794+ * before we free the context associated with the requests.27952795+ */27962796+ while (!list_empty(&ring->active_list)) {27972797+ struct drm_i915_gem_object *obj;27982798+27992799+ obj = list_first_entry(&ring->active_list,28002800+ struct drm_i915_gem_object,28012801+ ring_list);28022802+28032803+ if (!i915_gem_request_completed(obj->last_read_req, true))28042804+ break;28052805+28062806+ i915_gem_object_move_to_inactive(obj);27772807 }2778280827792809 if (unlikely(ring->trace_irq_req &&
···739739 for (id = kempld_dmi_table;740740 id->matches[0].slot != DMI_NONE; id++)741741 if (strstr(id->ident, force_device_id))742742- if (id->callback && id->callback(id))742742+ if (id->callback && !id->callback(id))743743 break;744744 if (id->matches[0].slot == DMI_NONE)745745 return -ENODEV;
+24-6
drivers/mfd/rtsx_usb.c
···196196int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data)197197{198198 u16 value;199199+ u8 *buf;200200+ int ret;199201200202 if (!data)201203 return -EINVAL;202202- *data = 0;204204+205205+ buf = kzalloc(sizeof(u8), GFP_KERNEL);206206+ if (!buf)207207+ return -ENOMEM;203208204209 addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT;205210 value = swab16(addr);206211207207- return usb_control_msg(ucr->pusb_dev,212212+ ret = usb_control_msg(ucr->pusb_dev,208213 usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP,209214 USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,210210- value, 0, data, 1, 100);215215+ value, 0, buf, 1, 100);216216+ *data = *buf;217217+218218+ kfree(buf);219219+ return ret;211220}212221EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register);213222···297288int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status)298289{299290 int ret;291291+ u16 *buf;300292301293 if (!status)302294 return -EINVAL;303295304304- if (polling_pipe == 0)296296+ if (polling_pipe == 0) {297297+ buf = kzalloc(sizeof(u16), GFP_KERNEL);298298+ if (!buf)299299+ return -ENOMEM;300300+305301 ret = usb_control_msg(ucr->pusb_dev,306302 usb_rcvctrlpipe(ucr->pusb_dev, 0),307303 RTSX_USB_REQ_POLL,308304 USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,309309- 0, 0, status, 2, 100);310310- else305305+ 0, 0, buf, 2, 100);306306+ *status = *buf;307307+308308+ kfree(buf);309309+ } else {311310 ret = rtsx_usb_get_status_with_bulk(ucr, status);311311+ }312312313313 /* usb_control_msg may return positive when success */314314 if (ret < 0)
+29-2
drivers/net/ethernet/amd/pcnet32.c
···15431543{15441544 struct pcnet32_private *lp;15451545 int i, media;15461546- int fdx, mii, fset, dxsuflo;15461546+ int fdx, mii, fset, dxsuflo, sram;15471547 int chip_version;15481548 char *chipname;15491549 struct net_device *dev;···15801580 }1581158115821582 /* initialize variables */15831583- fdx = mii = fset = dxsuflo = 0;15831583+ fdx = mii = fset = dxsuflo = sram = 0;15841584 chip_version = (chip_version >> 12) & 0xffff;1585158515861586 switch (chip_version) {···16131613 chipname = "PCnet/FAST III 79C973"; /* PCI */16141614 fdx = 1;16151615 mii = 1;16161616+ sram = 1;16161617 break;16171618 case 0x2626:16181619 chipname = "PCnet/Home 79C978"; /* PCI */···16371636 chipname = "PCnet/FAST III 79C975"; /* PCI */16381637 fdx = 1;16391638 mii = 1;16391639+ sram = 1;16401640 break;16411641 case 0x2628:16421642 chipname = "PCnet/PRO 79C976";···16641662 a->write_csr(ioaddr, 80,16651663 (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00);16661664 dxsuflo = 1;16651665+ }16661666+16671667+ /*16681668+ * The Am79C973/Am79C975 controllers come with 12K of SRAM16691669+ * which we can use for the Tx/Rx buffers but most importantly,16701670+ * the use of SRAM allow us to use the BCR18:NOUFLO bit to avoid16711671+ * Tx fifo underflows.16721672+ */16731673+ if (sram) {16741674+ /*16751675+ * The SRAM is being configured in two steps. First we16761676+ * set the SRAM size in the BCR25:SRAM_SIZE bits. According16771677+ * to the datasheet, each bit corresponds to a 512-byte16781678+ * page so we can have at most 24 pages. The SRAM_SIZE16791679+ * holds the value of the upper 8 bits of the 16-bit SRAM size.16801680+ * The low 8-bits start at 0x00 and end at 0xff. So the16811681+ * address range is from 0x0000 up to 0x17ff. Therefore,16821682+ * the SRAM_SIZE is set to 0x17. The next step is to set16831683+ * the BCR26:SRAM_BND midway through so the Tx and Rx16841684+ * buffers can share the SRAM equally.16851685+ */16861686+ a->write_bcr(ioaddr, 25, 0x17);16871687+ a->write_bcr(ioaddr, 26, 0xc);16881688+ /* And finally enable the NOUFLO bit */16891689+ a->write_bcr(ioaddr, 18, a->read_bcr(ioaddr, 18) | (1 << 11));16671690 }1668169116691692 dev = alloc_etherdev(sizeof(*lp));
+2
drivers/net/ethernet/emulex/benet/be.h
···354354 u16 vlan_tag;355355 u32 tx_rate;356356 u32 plink_tracking;357357+ u32 privileges;357358};358359359360enum vf_state {···424423425424 u8 __iomem *csr; /* CSR BAR used only for BE2/3 */426425 u8 __iomem *db; /* Door Bell */426426+ u8 __iomem *pcicfg; /* On SH,BEx only. Shadow of PCI config space */427427428428 struct mutex mbox_lock; /* For serializing mbox cmds to BE card */429429 struct be_dma_mem mbox_mem;
+7-10
drivers/net/ethernet/emulex/benet/be_cmds.c
···19021902{19031903 int num_eqs, i = 0;1904190419051905- if (lancer_chip(adapter) && num > 8) {19061906- while (num) {19071907- num_eqs = min(num, 8);19081908- __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs);19091909- i += num_eqs;19101910- num -= num_eqs;19111911- }19121912- } else {19131913- __be_cmd_modify_eqd(adapter, set_eqd, num);19051905+ while (num) {19061906+ num_eqs = min(num, 8);19071907+ __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs);19081908+ i += num_eqs;19091909+ num -= num_eqs;19141910 }1915191119161912 return 0;···1914191819151919/* Uses sycnhronous mcc */19161920int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,19171917- u32 num)19211921+ u32 num, u32 domain)19181922{19191923 struct be_mcc_wrb *wrb;19201924 struct be_cmd_req_vlan_config *req;···19321936 be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,19331937 OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req),19341938 wrb, NULL);19391939+ req->hdr.domain = domain;1935194019361941 req->interface_id = if_id;19371942 req->untagged = BE_IF_FLAGS_UNTAGGED & be_if_cap_flags(adapter) ? 1 : 0;
···699699 boff = tmp % bsize;700700 if (boff) {701701 bh = affs_bread_ino(inode, bidx, 0);702702- if (IS_ERR(bh))703703- return PTR_ERR(bh);702702+ if (IS_ERR(bh)) {703703+ written = PTR_ERR(bh);704704+ goto err_first_bh;705705+ }704706 tmp = min(bsize - boff, to - from);705707 BUG_ON(boff + tmp > bsize || tmp > bsize);706708 memcpy(AFFS_DATA(bh) + boff, data + from, tmp);···714712 bidx++;715713 } else if (bidx) {716714 bh = affs_bread_ino(inode, bidx - 1, 0);717717- if (IS_ERR(bh))718718- return PTR_ERR(bh);715715+ if (IS_ERR(bh)) {716716+ written = PTR_ERR(bh);717717+ goto err_first_bh;718718+ }719719 }720720 while (from + bsize <= to) {721721 prev_bh = bh;722722 bh = affs_getemptyblk_ino(inode, bidx);723723 if (IS_ERR(bh))724724- goto out;724724+ goto err_bh;725725 memcpy(AFFS_DATA(bh), data + from, bsize);726726 if (buffer_new(bh)) {727727 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);···755751 prev_bh = bh;756752 bh = affs_bread_ino(inode, bidx, 1);757753 if (IS_ERR(bh))758758- goto out;754754+ goto err_bh;759755 tmp = min(bsize, to - from);760756 BUG_ON(tmp > bsize);761757 memcpy(AFFS_DATA(bh), data + from, tmp);···794790 if (tmp > inode->i_size)795791 inode->i_size = AFFS_I(inode)->mmu_private = tmp;796792793793+err_first_bh:797794 unlock_page(page);798795 page_cache_release(page);799796800797 return written;801798802802-out:799799+err_bh:803800 bh = prev_bh;804801 if (!written)805802 written = PTR_ERR(bh);
+11-9
fs/hfsplus/brec.c
···131131 hfs_bnode_write(node, entry, data_off + key_len, entry_len);132132 hfs_bnode_dump(node);133133134134- if (new_node) {135135- /* update parent key if we inserted a key136136- * at the start of the first node137137- */138138- if (!rec && new_node != node)139139- hfs_brec_update_parent(fd);134134+ /*135135+ * update parent key if we inserted a key136136+ * at the start of the node and it is not the new node137137+ */138138+ if (!rec && new_node != node) {139139+ hfs_bnode_read_key(node, fd->search_key, data_off + size);140140+ hfs_brec_update_parent(fd);141141+ }140142143143+ if (new_node) {141144 hfs_bnode_put(fd->bnode);142145 if (!new_node->parent) {143146 hfs_btree_inc_height(tree);···170167 }171168 goto again;172169 }173173-174174- if (!rec)175175- hfs_brec_update_parent(fd);176170177171 return 0;178172}···370370 if (IS_ERR(parent))371371 return PTR_ERR(parent);372372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key);373373+ if (fd->record < 0)374374+ return -ENOENT;373375 hfs_bnode_dump(parent);374376 rec = fd->record;375377
+19-2
include/linux/clockchips.h
···3939 CLOCK_EVT_MODE_PERIODIC,4040 CLOCK_EVT_MODE_ONESHOT,4141 CLOCK_EVT_MODE_RESUME,4242+4343+ /* Legacy ->set_mode() callback doesn't support below modes */4244};43454446/*···8381 * @mode: operating mode assigned by the management code8482 * @features: features8583 * @retries: number of forced programming retries8686- * @set_mode: set mode function8484+ * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.8585+ * @set_mode_periodic: switch mode to periodic, if !set_mode8686+ * @set_mode_oneshot: switch mode to oneshot, if !set_mode8787+ * @set_mode_shutdown: switch mode to shutdown, if !set_mode8888+ * @set_mode_resume: resume clkevt device, if !set_mode8789 * @broadcast: function to broadcast events8890 * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration8991 * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration···114108 unsigned int features;115109 unsigned long retries;116110117117- void (*broadcast)(const struct cpumask *mask);111111+ /*112112+ * Mode transition callback(s): Only one of the two groups should be113113+ * defined:114114+ * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.115115+ * - set_mode_{shutdown|periodic|oneshot|resume}().116116+ */118117 void (*set_mode)(enum clock_event_mode mode,119118 struct clock_event_device *);119119+ int (*set_mode_periodic)(struct clock_event_device *);120120+ int (*set_mode_oneshot)(struct clock_event_device *);121121+ int (*set_mode_shutdown)(struct clock_event_device *);122122+ int (*set_mode_resume)(struct clock_event_device *);123123+124124+ void (*broadcast)(const struct cpumask *mask);120125 void (*suspend)(struct clock_event_device *);121126 void (*resume)(struct clock_event_device *);122127 unsigned long min_delta_ticks;
+17-8
include/linux/clocksource.h
···5656 * @shift: cycle to nanosecond divisor (power of two)5757 * @max_idle_ns: max idle time permitted by the clocksource (nsecs)5858 * @maxadj: maximum adjustment value to mult (~11%)5959+ * @max_cycles: maximum safe cycle value which won't overflow on multiplication5960 * @flags: flags describing special properties6061 * @archdata: arch-specific data6162 * @suspend: suspend function for the clocksource, if necessary···7776#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA7877 struct arch_clocksource_data archdata;7978#endif8080-7979+ u64 max_cycles;8180 const char *name;8281 struct list_head list;8382 int rating;···179178}180179181180182182-extern int clocksource_register(struct clocksource*);183181extern int clocksource_unregister(struct clocksource*);184182extern void clocksource_touch_watchdog(void);185183extern struct clocksource* clocksource_get_next(void);···189189extern void clocksource_mark_unstable(struct clocksource *cs);190190191191extern u64192192-clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);192192+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);193193extern void194194clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);195195···200200extern int201201__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);202202extern void203203-__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);203203+__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);204204+205205+/*206206+ * Don't call this unless you are a default clocksource207207+ * (AKA: jiffies) and absolutely have to.208208+ */209209+static inline int __clocksource_register(struct clocksource *cs)210210+{211211+ return __clocksource_register_scale(cs, 1, 0);212212+}204213205214static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)206215{···221212 return __clocksource_register_scale(cs, 1000, khz);222213}223214224224-static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)215215+static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)225216{226226- __clocksource_updatefreq_scale(cs, 1, hz);217217+ __clocksource_update_freq_scale(cs, 1, hz);227218}228219229229-static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)220220+static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)230221{231231- __clocksource_updatefreq_scale(cs, 1000, khz);222222+ __clocksource_update_freq_scale(cs, 1000, khz);232223}233224234225
+1
include/linux/libata.h
···232232 * led */233233 ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */234234 ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */235235+ ATA_FLAG_SAS_HOST = (1 << 25), /* SAS host */235236236237 /* bits 24:31 of ap->flags are reserved for LLD specific flags */237238
+3
include/linux/mfd/palmas.h
···29992999#define PALMAS_GPADC_TRIM15 0x0E30003000#define PALMAS_GPADC_TRIM16 0x0F3001300130023002+/* TPS659038 regen2_ctrl offset iss different from palmas */30033003+#define TPS659038_REGEN2_CTRL 0x1230043004+30023005/* TPS65917 Interrupt registers */3003300630043007/* Registers for function INTERRUPT */
···316316 * @driver_data: private regulator data317317 * @of_node: OpenFirmware node to parse for device tree bindings (may be318318 * NULL).319319- * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is319319+ * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is320320 * insufficient.321321 * @ena_gpio_initialized: GPIO controlling regulator enable was properly322322 * initialized, meaning that >= 0 is a valid gpio
+5-4
include/linux/sched.h
···1625162516261626 /*16271627 * numa_faults_locality tracks if faults recorded during the last16281628- * scan window were remote/local. The task scan period is adapted16291629- * based on the locality of the faults with different weights16301630- * depending on whether they were shared or private faults16281628+ * scan window were remote/local or failed to migrate. The task scan16291629+ * period is adapted based on the locality of the faults with different16301630+ * weights depending on whether they were shared or private faults16311631 */16321632- unsigned long numa_faults_locality[2];16321632+ unsigned long numa_faults_locality[3];1633163316341634 unsigned long numa_pages_migrated;16351635#endif /* CONFIG_NUMA_BALANCING */···17191719#define TNF_NO_GROUP 0x0217201720#define TNF_SHARED 0x0417211721#define TNF_FAULT_LOCAL 0x0817221722+#define TNF_MIGRATE_FAIL 0x101722172317231724#ifdef CONFIG_NUMA_BALANCING17241725extern void task_numa_fault(int last_node, int node, int pages, int flags);
+8-8
include/linux/timekeeper_internal.h
···1616 * @read: Read function of @clock1717 * @mask: Bitmask for two's complement subtraction of non 64bit clocks1818 * @cycle_last: @clock cycle value at last update1919- * @mult: NTP adjusted multiplier for scaled math conversion1919+ * @mult: (NTP adjusted) multiplier for scaled math conversion2020 * @shift: Shift value for scaled math conversion2121 * @xtime_nsec: Shifted (fractional) nano seconds offset for readout2222- * @base_mono: ktime_t (nanoseconds) base time for readout2222+ * @base: ktime_t (nanoseconds) base time for readout2323 *2424 * This struct has size 56 byte on 64 bit. Together with a seqcount it2525 * occupies a single 64byte cache line.2626 *2727 * The struct is separate from struct timekeeper as it is also used2828- * for a fast NMI safe accessor to clock monotonic.2828+ * for a fast NMI safe accessors.2929 */3030struct tk_read_base {3131 struct clocksource *clock;···3535 u32 mult;3636 u32 shift;3737 u64 xtime_nsec;3838- ktime_t base_mono;3838+ ktime_t base;3939};40404141/**4242 * struct timekeeper - Structure holding internal timekeeping values.4343- * @tkr: The readout base structure4343+ * @tkr_mono: The readout base structure for CLOCK_MONOTONIC4444+ * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW4445 * @xtime_sec: Current CLOCK_REALTIME time in seconds4546 * @ktime_sec: Current CLOCK_MONOTONIC time in seconds4647 * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset···4948 * @offs_boot: Offset clock monotonic -> clock boottime5049 * @offs_tai: Offset clock monotonic -> clock tai5150 * @tai_offset: The current UTC to TAI offset in seconds5252- * @base_raw: Monotonic raw base time in ktime_t format5351 * @raw_time: Monotonic raw base time in timespec64 format5452 * @cycle_interval: Number of clock cycles in one NTP interval5553 * @xtime_interval: Number of clock shifted nano seconds in one NTP···7676 * used instead.7777 */7878struct timekeeper {7979- struct tk_read_base tkr;7979+ struct tk_read_base tkr_mono;8080+ struct tk_read_base tkr_raw;8081 u64 xtime_sec;8182 unsigned long ktime_sec;8283 struct timespec64 wall_to_monotonic;···8584 ktime_t offs_boot;8685 ktime_t offs_tai;8786 s32 tai_offset;8888- ktime_t base_raw;8987 struct timespec64 raw_time;90889189 /* The following members are for timekeeping internal use */
···326326 exclude_callchain_user : 1, /* exclude user callchains */327327 mmap2 : 1, /* include mmap with inode data */328328 comm_exec : 1, /* flag comm events that are due to an exec */329329- __reserved_1 : 39;329329+ use_clockid : 1, /* use @clockid for time fields */330330+ __reserved_1 : 38;330331331332 union {332333 __u32 wakeup_events; /* wakeup every n events */···356355 */357356 __u32 sample_stack_user;358357359359- /* Align to u64. */360360- __u32 __reserved_2;358358+ __s32 clockid;361359 /*362360 * Defines set of regs to dump for each sample363361 * state captured on:
+74-3
kernel/events/core.c
···327327 return local_clock();328328}329329330330+static inline u64 perf_event_clock(struct perf_event *event)331331+{332332+ return event->clock();333333+}334334+330335static inline struct perf_cpu_context *331336__get_cpu_context(struct perf_event_context *ctx)332337{···47674762 }4768476347694764 if (sample_type & PERF_SAMPLE_TIME)47704770- data->time = perf_clock();47654765+ data->time = perf_event_clock(event);4771476647724767 if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))47734768 data->id = primary_event_id(event);···53455340 task_event->event_id.tid = perf_event_tid(event, task);53465341 task_event->event_id.ptid = perf_event_tid(event, current);5347534253435343+ task_event->event_id.time = perf_event_clock(event);53445344+53485345 perf_output_put(&handle, task_event->event_id);5349534653505347 perf_event__output_id_sample(event, &handle, &sample);···53805373 /* .ppid */53815374 /* .tid */53825375 /* .ptid */53835383- .time = perf_clock(),53765376+ /* .time */53845377 },53855378 };53865379···57565749 .misc = 0,57575750 .size = sizeof(throttle_event),57585751 },57595759- .time = perf_clock(),57525752+ .time = perf_event_clock(event),57605753 .id = primary_event_id(event),57615754 .stream_id = event->id,57625755 };···63006293static struct pmu perf_swevent = {63016294 .task_ctx_nr = perf_sw_context,6302629562966296+ .capabilities = PERF_PMU_CAP_NO_NMI,62976297+63036298 .event_init = perf_swevent_init,63046299 .add = perf_swevent_add,63056300 .del = perf_swevent_del,···66456636static struct pmu perf_cpu_clock = {66466637 .task_ctx_nr = perf_sw_context,6647663866396639+ .capabilities = PERF_PMU_CAP_NO_NMI,66406640+66486641 .event_init = cpu_clock_event_init,66496642 .add = cpu_clock_event_add,66506643 .del = cpu_clock_event_del,···6725671467266715static struct pmu perf_task_clock = {67276716 .task_ctx_nr = perf_sw_context,67176717+67186718+ .capabilities = PERF_PMU_CAP_NO_NMI,6728671967296720 .event_init = task_clock_event_init,67306721 .add = task_clock_event_add,···72137200 event->hw.target = task;72147201 }7215720272037203+ event->clock = &local_clock;72047204+ if (parent_event)72057205+ event->clock = parent_event->clock;72067206+72167207 if (!overflow_handler && parent_event) {72177208 overflow_handler = parent_event->overflow_handler;72187209 context = parent_event->overflow_handler_context;···74397422 if (output_event->cpu == -1 && output_event->ctx != event->ctx)74407423 goto out;7441742474257425+ /*74267426+ * Mixing clocks in the same buffer is trouble you don't need.74277427+ */74287428+ if (output_event->clock != event->clock)74297429+ goto out;74307430+74427431set:74437432 mutex_lock(&event->mmap_mutex);74447433 /* Can't redirect output if we've got an active mmap() */···7475745274767453 mutex_lock(a);74777454 mutex_lock_nested(b, SINGLE_DEPTH_NESTING);74557455+}74567456+74577457+static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)74587458+{74597459+ bool nmi_safe = false;74607460+74617461+ switch (clk_id) {74627462+ case CLOCK_MONOTONIC:74637463+ event->clock = &ktime_get_mono_fast_ns;74647464+ nmi_safe = true;74657465+ break;74667466+74677467+ case CLOCK_MONOTONIC_RAW:74687468+ event->clock = &ktime_get_raw_fast_ns;74697469+ nmi_safe = true;74707470+ break;74717471+74727472+ case CLOCK_REALTIME:74737473+ event->clock = &ktime_get_real_ns;74747474+ break;74757475+74767476+ case CLOCK_BOOTTIME:74777477+ event->clock = &ktime_get_boot_ns;74787478+ break;74797479+74807480+ case CLOCK_TAI:74817481+ event->clock = &ktime_get_tai_ns;74827482+ break;74837483+74847484+ default:74857485+ return -EINVAL;74867486+ }74877487+74887488+ if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))74897489+ return -EINVAL;74907490+74917491+ return 0;74787492}7479749374807494/**···76297569 */76307570 pmu = event->pmu;7631757175727572+ if (attr.use_clockid) {75737573+ err = perf_event_set_clock(event, attr.clockid);75747574+ if (err)75757575+ goto err_alloc;75767576+ }75777577+76327578 if (group_leader &&76337579 (is_software_event(event) != is_software_event(group_leader))) {76347580 if (is_software_event(event)) {···76847618 */76857619 if (group_leader->group_leader != group_leader)76867620 goto err_context;76217621+76227622+ /* All events in a group should have the same clock */76237623+ if (group_leader->clock != event->clock)76247624+ goto err_context;76257625+76877626 /*76887627 * Do not allow to attach to a group in a different76897628 * task or CPU context:
+6-2
kernel/sched/fair.c
···16091609 /*16101610 * If there were no record hinting faults then either the task is16111611 * completely idle or all activity is areas that are not of interest16121612- * to automatic numa balancing. Scan slower16121612+ * to automatic numa balancing. Related to that, if there were failed16131613+ * migration then it implies we are migrating too quickly or the local16141614+ * node is overloaded. In either case, scan slower16131615 */16141614- if (local + shared == 0) {16161616+ if (local + shared == 0 || p->numa_faults_locality[2]) {16151617 p->numa_scan_period = min(p->numa_scan_period_max,16161618 p->numa_scan_period << 1);16171619···2082208020832081 if (migrated)20842082 p->numa_pages_migrated += pages;20832083+ if (flags & TNF_MIGRATE_FAIL)20842084+ p->numa_faults_locality[2] += pages;2085208520862086 p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages;20872087 p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;
+86-2
kernel/time/clockevents.c
···9494}9595EXPORT_SYMBOL_GPL(clockevent_delta2ns);96969797+static int __clockevents_set_mode(struct clock_event_device *dev,9898+ enum clock_event_mode mode)9999+{100100+ /* Transition with legacy set_mode() callback */101101+ if (dev->set_mode) {102102+ /* Legacy callback doesn't support new modes */103103+ if (mode > CLOCK_EVT_MODE_RESUME)104104+ return -ENOSYS;105105+ dev->set_mode(mode, dev);106106+ return 0;107107+ }108108+109109+ if (dev->features & CLOCK_EVT_FEAT_DUMMY)110110+ return 0;111111+112112+ /* Transition with new mode-specific callbacks */113113+ switch (mode) {114114+ case CLOCK_EVT_MODE_UNUSED:115115+ /*116116+ * This is an internal state, which is guaranteed to go from117117+ * SHUTDOWN to UNUSED. No driver interaction required.118118+ */119119+ return 0;120120+121121+ case CLOCK_EVT_MODE_SHUTDOWN:122122+ return dev->set_mode_shutdown(dev);123123+124124+ case CLOCK_EVT_MODE_PERIODIC:125125+ /* Core internal bug */126126+ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))127127+ return -ENOSYS;128128+ return dev->set_mode_periodic(dev);129129+130130+ case CLOCK_EVT_MODE_ONESHOT:131131+ /* Core internal bug */132132+ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))133133+ return -ENOSYS;134134+ return dev->set_mode_oneshot(dev);135135+136136+ case CLOCK_EVT_MODE_RESUME:137137+ /* Optional callback */138138+ if (dev->set_mode_resume)139139+ return dev->set_mode_resume(dev);140140+ else141141+ return 0;142142+143143+ default:144144+ return -ENOSYS;145145+ }146146+}147147+97148/**98149 * clockevents_set_mode - set the operating mode of a clock event device99150 * @dev: device to modify···156105 enum clock_event_mode mode)157106{158107 if (dev->mode != mode) {159159- dev->set_mode(mode, dev);108108+ if (__clockevents_set_mode(dev, mode))109109+ return;110110+160111 dev->mode = mode;161112162113 /*···426373}427374EXPORT_SYMBOL_GPL(clockevents_unbind);428375376376+/* Sanity check of mode transition callbacks */377377+static int clockevents_sanity_check(struct clock_event_device *dev)378378+{379379+ /* Legacy set_mode() callback */380380+ if (dev->set_mode) {381381+ /* We shouldn't be supporting new modes now */382382+ WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||383383+ dev->set_mode_shutdown || dev->set_mode_resume);384384+ return 0;385385+ }386386+387387+ if (dev->features & CLOCK_EVT_FEAT_DUMMY)388388+ return 0;389389+390390+ /* New mode-specific callbacks */391391+ if (!dev->set_mode_shutdown)392392+ return -EINVAL;393393+394394+ if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&395395+ !dev->set_mode_periodic)396396+ return -EINVAL;397397+398398+ if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&399399+ !dev->set_mode_oneshot)400400+ return -EINVAL;401401+402402+ return 0;403403+}404404+429405/**430406 * clockevents_register_device - register a clock event device431407 * @dev: device to register···464382 unsigned long flags;465383466384 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);385385+ BUG_ON(clockevents_sanity_check(dev));386386+467387 if (!dev->cpumask) {468388 WARN_ON(num_possible_cpus() > 1);469389 dev->cpumask = cpumask_of(smp_processor_id());···533449 return clockevents_program_event(dev, dev->next_event, false);534450535451 if (dev->mode == CLOCK_EVT_MODE_PERIODIC)536536- dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);452452+ return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);537453538454 return 0;539455}
+76-92
kernel/time/clocksource.c
···142142 schedule_work(&watchdog_work);143143}144144145145-static void clocksource_unstable(struct clocksource *cs, int64_t delta)146146-{147147- printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",148148- cs->name, delta);149149- __clocksource_unstable(cs);150150-}151151-152145/**153146 * clocksource_mark_unstable - mark clocksource unstable via watchdog154147 * @cs: clocksource to be marked unstable···167174static void clocksource_watchdog(unsigned long data)168175{169176 struct clocksource *cs;170170- cycle_t csnow, wdnow, delta;177177+ cycle_t csnow, wdnow, cslast, wdlast, delta;171178 int64_t wd_nsec, cs_nsec;172179 int next_cpu, reset_pending;173180···206213207214 delta = clocksource_delta(csnow, cs->cs_last, cs->mask);208215 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);216216+ wdlast = cs->wd_last; /* save these in case we print them */217217+ cslast = cs->cs_last;209218 cs->cs_last = csnow;210219 cs->wd_last = wdnow;211220···216221217222 /* Check the deviation from the watchdog clocksource. */218223 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {219219- clocksource_unstable(cs, cs_nsec - wd_nsec);224224+ pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);225225+ pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",226226+ watchdog->name, wdnow, wdlast, watchdog->mask);227227+ pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",228228+ cs->name, csnow, cslast, cs->mask);229229+ __clocksource_unstable(cs);220230 continue;221231 }222232···469469 * @shift: cycle to nanosecond divisor (power of two)470470 * @maxadj: maximum adjustment value to mult (~11%)471471 * @mask: bitmask for two's complement subtraction of non 64 bit counters472472+ * @max_cyc: maximum cycle value before potential overflow (does not include473473+ * any safety margin)474474+ *475475+ * NOTE: This function includes a safety margin of 50%, so that bad clock values476476+ * can be detected.472477 */473473-u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)478478+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)474479{475480 u64 max_nsecs, max_cycles;476481477482 /*478483 * Calculate the maximum number of cycles that we can pass to the479479- * cyc2ns function without overflowing a 64-bit signed result. The480480- * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)481481- * which is equivalent to the below.482482- * max_cycles < (2^63)/(mult + maxadj)483483- * max_cycles < 2^(log2((2^63)/(mult + maxadj)))484484- * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))485485- * max_cycles < 2^(63 - log2(mult + maxadj))486486- * max_cycles < 1 << (63 - log2(mult + maxadj))487487- * Please note that we add 1 to the result of the log2 to account for488488- * any rounding errors, ensure the above inequality is satisfied and489489- * no overflow will occur.484484+ * cyc2ns() function without overflowing a 64-bit result.490485 */491491- max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));486486+ max_cycles = ULLONG_MAX;487487+ do_div(max_cycles, mult+maxadj);492488493489 /*494490 * The actual maximum number of cycles we can defer the clocksource is···495499 max_cycles = min(max_cycles, mask);496500 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);497501502502+ /* return the max_cycles value as well if requested */503503+ if (max_cyc)504504+ *max_cyc = max_cycles;505505+506506+ /* Return 50% of the actual maximum, so we can detect bad values */507507+ max_nsecs >>= 1;508508+498509 return max_nsecs;499510}500511501512/**502502- * clocksource_max_deferment - Returns max time the clocksource can be deferred503503- * @cs: Pointer to clocksource513513+ * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles514514+ * @cs: Pointer to clocksource to be updated504515 *505516 */506506-static u64 clocksource_max_deferment(struct clocksource *cs)517517+static inline void clocksource_update_max_deferment(struct clocksource *cs)507518{508508- u64 max_nsecs;509509-510510- max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,511511- cs->mask);512512- /*513513- * To ensure that the clocksource does not wrap whilst we are idle,514514- * limit the time the clocksource can be deferred by 12.5%. Please515515- * note a margin of 12.5% is used because this can be computed with516516- * a shift, versus say 10% which would require division.517517- */518518- return max_nsecs - (max_nsecs >> 3);519519+ cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,520520+ cs->maxadj, cs->mask,521521+ &cs->max_cycles);519522}520523521524#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET···643648}644649645650/**646646- * __clocksource_updatefreq_scale - Used update clocksource with new freq651651+ * __clocksource_update_freq_scale - Used update clocksource with new freq647652 * @cs: clocksource to be registered648653 * @scale: Scale factor multiplied against freq to get clocksource hz649654 * @freq: clocksource frequency (cycles per second) divided by scale···651656 * This should only be called from the clocksource->enable() method.652657 *653658 * This *SHOULD NOT* be called directly! Please use the654654- * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.659659+ * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper660660+ * functions.655661 */656656-void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)662662+void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)657663{658664 u64 sec;665665+659666 /*660660- * Calc the maximum number of seconds which we can run before661661- * wrapping around. For clocksources which have a mask > 32bit662662- * we need to limit the max sleep time to have a good663663- * conversion precision. 10 minutes is still a reasonable664664- * amount. That results in a shift value of 24 for a665665- * clocksource with mask >= 40bit and f >= 4GHz. That maps to666666- * ~ 0.06ppm granularity for NTP. We apply the same 12.5%667667- * margin as we do in clocksource_max_deferment()667667+ * Default clocksources are *special* and self-define their mult/shift.668668+ * But, you're not special, so you should specify a freq value.668669 */669669- sec = (cs->mask - (cs->mask >> 3));670670- do_div(sec, freq);671671- do_div(sec, scale);672672- if (!sec)673673- sec = 1;674674- else if (sec > 600 && cs->mask > UINT_MAX)675675- sec = 600;670670+ if (freq) {671671+ /*672672+ * Calc the maximum number of seconds which we can run before673673+ * wrapping around. For clocksources which have a mask > 32-bit674674+ * we need to limit the max sleep time to have a good675675+ * conversion precision. 10 minutes is still a reasonable676676+ * amount. That results in a shift value of 24 for a677677+ * clocksource with mask >= 40-bit and f >= 4GHz. That maps to678678+ * ~ 0.06ppm granularity for NTP.679679+ */680680+ sec = cs->mask;681681+ do_div(sec, freq);682682+ do_div(sec, scale);683683+ if (!sec)684684+ sec = 1;685685+ else if (sec > 600 && cs->mask > UINT_MAX)686686+ sec = 600;676687677677- clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,678678- NSEC_PER_SEC / scale, sec * scale);679679-688688+ clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,689689+ NSEC_PER_SEC / scale, sec * scale);690690+ }680691 /*681681- * for clocksources that have large mults, to avoid overflow.682682- * Since mult may be adjusted by ntp, add an safety extra margin683683- *692692+ * Ensure clocksources that have large 'mult' values don't overflow693693+ * when adjusted.684694 */685695 cs->maxadj = clocksource_max_adjustment(cs);686686- while ((cs->mult + cs->maxadj < cs->mult)687687- || (cs->mult - cs->maxadj > cs->mult)) {696696+ while (freq && ((cs->mult + cs->maxadj < cs->mult)697697+ || (cs->mult - cs->maxadj > cs->mult))) {688698 cs->mult >>= 1;689699 cs->shift--;690700 cs->maxadj = clocksource_max_adjustment(cs);691701 }692702693693- cs->max_idle_ns = clocksource_max_deferment(cs);703703+ /*704704+ * Only warn for *special* clocksources that self-define705705+ * their mult/shift values and don't specify a freq.706706+ */707707+ WARN_ONCE(cs->mult + cs->maxadj < cs->mult,708708+ "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",709709+ cs->name);710710+711711+ clocksource_update_max_deferment(cs);712712+713713+ pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",714714+ cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);694715}695695-EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);716716+EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);696717697718/**698719 * __clocksource_register_scale - Used to install new clocksources···725714{726715727716 /* Initialize mult/shift and max_idle_ns */728728- __clocksource_updatefreq_scale(cs, scale, freq);717717+ __clocksource_update_freq_scale(cs, scale, freq);729718730719 /* Add clocksource to the clocksource list */731720 mutex_lock(&clocksource_mutex);···736725 return 0;737726}738727EXPORT_SYMBOL_GPL(__clocksource_register_scale);739739-740740-741741-/**742742- * clocksource_register - Used to install new clocksources743743- * @cs: clocksource to be registered744744- *745745- * Returns -EBUSY if registration fails, zero otherwise.746746- */747747-int clocksource_register(struct clocksource *cs)748748-{749749- /* calculate max adjustment for given mult/shift */750750- cs->maxadj = clocksource_max_adjustment(cs);751751- WARN_ONCE(cs->mult + cs->maxadj < cs->mult,752752- "Clocksource %s might overflow on 11%% adjustment\n",753753- cs->name);754754-755755- /* calculate max idle time permitted for this clocksource */756756- cs->max_idle_ns = clocksource_max_deferment(cs);757757-758758- mutex_lock(&clocksource_mutex);759759- clocksource_enqueue(cs);760760- clocksource_enqueue_watchdog(cs);761761- clocksource_select();762762- mutex_unlock(&clocksource_mutex);763763- return 0;764764-}765765-EXPORT_SYMBOL(clocksource_register);766728767729static void __clocksource_change_rating(struct clocksource *cs, int rating)768730{
···11/*22- * sched_clock.c: support for extending counters to full 64-bit ns counter22+ * sched_clock.c: Generic sched_clock() support, to extend low level33+ * hardware time counters to full 64-bit ns values.34 *45 * This program is free software; you can redistribute it and/or modify56 * it under the terms of the GNU General Public License version 2 as···1918#include <linux/seqlock.h>2019#include <linux/bitops.h>21202222-struct clock_data {2323- ktime_t wrap_kt;2121+/**2222+ * struct clock_read_data - data required to read from sched_clock()2323+ *2424+ * @epoch_ns: sched_clock() value at last update2525+ * @epoch_cyc: Clock cycle value at last update.2626+ * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit2727+ * clocks.2828+ * @read_sched_clock: Current clock source (or dummy source when suspended).2929+ * @mult: Multipler for scaled math conversion.3030+ * @shift: Shift value for scaled math conversion.3131+ *3232+ * Care must be taken when updating this structure; it is read by3333+ * some very hot code paths. It occupies <=40 bytes and, when combined3434+ * with the seqcount used to synchronize access, comfortably fits into3535+ * a 64 byte cache line.3636+ */3737+struct clock_read_data {2438 u64 epoch_ns;2539 u64 epoch_cyc;2626- seqcount_t seq;2727- unsigned long rate;4040+ u64 sched_clock_mask;4141+ u64 (*read_sched_clock)(void);2842 u32 mult;2943 u32 shift;3030- bool suspended;4444+};4545+4646+/**4747+ * struct clock_data - all data needed for sched_clock() (including4848+ * registration of a new clock source)4949+ *5050+ * @seq: Sequence counter for protecting updates. The lowest5151+ * bit is the index for @read_data.5252+ * @read_data: Data required to read from sched_clock.5353+ * @wrap_kt: Duration for which clock can run before wrapping.5454+ * @rate: Tick rate of the registered clock.5555+ * @actual_read_sched_clock: Registered hardware level clock read function.5656+ *5757+ * The ordering of this structure has been chosen to optimize cache5858+ * performance. In particular 'seq' and 'read_data[0]' (combined) should fit5959+ * into a single 64-byte cache line.6060+ */6161+struct clock_data {6262+ seqcount_t seq;6363+ struct clock_read_data read_data[2];6464+ ktime_t wrap_kt;6565+ unsigned long rate;6666+6767+ u64 (*actual_read_sched_clock)(void);3168};32693370static struct hrtimer sched_clock_timer;3471static int irqtime = -1;35723673core_param(irqtime, irqtime, int, 0400);3737-3838-static struct clock_data cd = {3939- .mult = NSEC_PER_SEC / HZ,4040-};4141-4242-static u64 __read_mostly sched_clock_mask;43744475static u64 notrace jiffy_sched_clock_read(void)4576{···8249 return (u64)(jiffies - INITIAL_JIFFIES);8350}84518585-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;5252+static struct clock_data cd ____cacheline_aligned = {5353+ .read_data[0] = { .mult = NSEC_PER_SEC / HZ,5454+ .read_sched_clock = jiffy_sched_clock_read, },5555+ .actual_read_sched_clock = jiffy_sched_clock_read,5656+};86578758static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)8859{···95589659unsigned long long notrace sched_clock(void)9760{9898- u64 epoch_ns;9999- u64 epoch_cyc;100100- u64 cyc;6161+ u64 cyc, res;10162 unsigned long seq;102102-103103- if (cd.suspended)104104- return cd.epoch_ns;6363+ struct clock_read_data *rd;1056410665 do {107107- seq = raw_read_seqcount_begin(&cd.seq);108108- epoch_cyc = cd.epoch_cyc;109109- epoch_ns = cd.epoch_ns;6666+ seq = raw_read_seqcount(&cd.seq);6767+ rd = cd.read_data + (seq & 1);6868+6969+ cyc = (rd->read_sched_clock() - rd->epoch_cyc) &7070+ rd->sched_clock_mask;7171+ res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);11072 } while (read_seqcount_retry(&cd.seq, seq));11173112112- cyc = read_sched_clock();113113- cyc = (cyc - epoch_cyc) & sched_clock_mask;114114- return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);7474+ return res;11575}1167611777/*118118- * Atomically update the sched_clock epoch.7878+ * Updating the data required to read the clock.7979+ *8080+ * sched_clock() will never observe mis-matched data even if called from8181+ * an NMI. We do this by maintaining an odd/even copy of the data and8282+ * steering sched_clock() to one or the other using a sequence counter.8383+ * In order to preserve the data cache profile of sched_clock() as much8484+ * as possible the system reverts back to the even copy when the update8585+ * completes; the odd copy is used *only* during an update.11986 */120120-static void notrace update_sched_clock(void)8787+static void update_clock_read_data(struct clock_read_data *rd)12188{122122- unsigned long flags;8989+ /* update the backup (odd) copy with the new data */9090+ cd.read_data[1] = *rd;9191+9292+ /* steer readers towards the odd copy */9393+ raw_write_seqcount_latch(&cd.seq);9494+9595+ /* now its safe for us to update the normal (even) copy */9696+ cd.read_data[0] = *rd;9797+9898+ /* switch readers back to the even copy */9999+ raw_write_seqcount_latch(&cd.seq);100100+}101101+102102+/*103103+ * Atomically update the sched_clock() epoch.104104+ */105105+static void update_sched_clock(void)106106+{123107 u64 cyc;124108 u64 ns;109109+ struct clock_read_data rd;125110126126- cyc = read_sched_clock();127127- ns = cd.epoch_ns +128128- cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,129129- cd.mult, cd.shift);111111+ rd = cd.read_data[0];130112131131- raw_local_irq_save(flags);132132- raw_write_seqcount_begin(&cd.seq);133133- cd.epoch_ns = ns;134134- cd.epoch_cyc = cyc;135135- raw_write_seqcount_end(&cd.seq);136136- raw_local_irq_restore(flags);113113+ cyc = cd.actual_read_sched_clock();114114+ ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);115115+116116+ rd.epoch_ns = ns;117117+ rd.epoch_cyc = cyc;118118+119119+ update_clock_read_data(&rd);137120}138121139122static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)140123{141124 update_sched_clock();142125 hrtimer_forward_now(hrt, cd.wrap_kt);126126+143127 return HRTIMER_RESTART;144128}145129146146-void __init sched_clock_register(u64 (*read)(void), int bits,147147- unsigned long rate)130130+void __init131131+sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)148132{149133 u64 res, wrap, new_mask, new_epoch, cyc, ns;150134 u32 new_mult, new_shift;151151- ktime_t new_wrap_kt;152135 unsigned long r;153136 char r_unit;137137+ struct clock_read_data rd;154138155139 if (cd.rate > rate)156140 return;157141158142 WARN_ON(!irqs_disabled());159143160160- /* calculate the mult/shift to convert counter ticks to ns. */144144+ /* Calculate the mult/shift to convert counter ticks to ns. */161145 clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);162146163147 new_mask = CLOCKSOURCE_MASK(bits);164164-165165- /* calculate how many ns until we wrap */166166- wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);167167- new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));168168-169169- /* update epoch for new counter and update epoch_ns from old counter*/170170- new_epoch = read();171171- cyc = read_sched_clock();172172- ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,173173- cd.mult, cd.shift);174174-175175- raw_write_seqcount_begin(&cd.seq);176176- read_sched_clock = read;177177- sched_clock_mask = new_mask;178148 cd.rate = rate;179179- cd.wrap_kt = new_wrap_kt;180180- cd.mult = new_mult;181181- cd.shift = new_shift;182182- cd.epoch_cyc = new_epoch;183183- cd.epoch_ns = ns;184184- raw_write_seqcount_end(&cd.seq);149149+150150+ /* Calculate how many nanosecs until we risk wrapping */151151+ wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);152152+ cd.wrap_kt = ns_to_ktime(wrap);153153+154154+ rd = cd.read_data[0];155155+156156+ /* Update epoch for new counter and update 'epoch_ns' from old counter*/157157+ new_epoch = read();158158+ cyc = cd.actual_read_sched_clock();159159+ ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);160160+ cd.actual_read_sched_clock = read;161161+162162+ rd.read_sched_clock = read;163163+ rd.sched_clock_mask = new_mask;164164+ rd.mult = new_mult;165165+ rd.shift = new_shift;166166+ rd.epoch_cyc = new_epoch;167167+ rd.epoch_ns = ns;168168+169169+ update_clock_read_data(&rd);185170186171 r = rate;187172 if (r >= 4000000) {188173 r /= 1000000;189174 r_unit = 'M';190190- } else if (r >= 1000) {191191- r /= 1000;192192- r_unit = 'k';193193- } else194194- r_unit = ' ';175175+ } else {176176+ if (r >= 1000) {177177+ r /= 1000;178178+ r_unit = 'k';179179+ } else {180180+ r_unit = ' ';181181+ }182182+ }195183196196- /* calculate the ns resolution of this counter */184184+ /* Calculate the ns resolution of this counter */197185 res = cyc_to_ns(1ULL, new_mult, new_shift);198186199187 pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",200188 bits, r, r_unit, res, wrap);201189202202- /* Enable IRQ time accounting if we have a fast enough sched_clock */190190+ /* Enable IRQ time accounting if we have a fast enough sched_clock() */203191 if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))204192 enable_sched_clock_irqtime();205193···234172void __init sched_clock_postinit(void)235173{236174 /*237237- * If no sched_clock function has been provided at that point,175175+ * If no sched_clock() function has been provided at that point,238176 * make it the final one one.239177 */240240- if (read_sched_clock == jiffy_sched_clock_read)178178+ if (cd.actual_read_sched_clock == jiffy_sched_clock_read)241179 sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);242180243181 update_sched_clock();···251189 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);252190}253191192192+/*193193+ * Clock read function for use when the clock is suspended.194194+ *195195+ * This function makes it appear to sched_clock() as if the clock196196+ * stopped counting at its last update.197197+ *198198+ * This function must only be called from the critical199199+ * section in sched_clock(). It relies on the read_seqcount_retry()200200+ * at the end of the critical section to be sure we observe the201201+ * correct copy of 'epoch_cyc'.202202+ */203203+static u64 notrace suspended_sched_clock_read(void)204204+{205205+ unsigned long seq = raw_read_seqcount(&cd.seq);206206+207207+ return cd.read_data[seq & 1].epoch_cyc;208208+}209209+254210static int sched_clock_suspend(void)255211{212212+ struct clock_read_data *rd = &cd.read_data[0];213213+256214 update_sched_clock();257215 hrtimer_cancel(&sched_clock_timer);258258- cd.suspended = true;216216+ rd->read_sched_clock = suspended_sched_clock_read;217217+259218 return 0;260219}261220262221static void sched_clock_resume(void)263222{264264- cd.epoch_cyc = read_sched_clock();223223+ struct clock_read_data *rd = &cd.read_data[0];224224+225225+ rd->epoch_cyc = cd.actual_read_sched_clock();265226 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);266266- cd.suspended = false;227227+ rd->read_sched_clock = cd.actual_read_sched_clock;267228}268229269230static struct syscore_ops sched_clock_ops = {270270- .suspend = sched_clock_suspend,271271- .resume = sched_clock_resume,231231+ .suspend = sched_clock_suspend,232232+ .resume = sched_clock_resume,272233};273234274235static int __init sched_clock_syscore_init(void)275236{276237 register_syscore_ops(&sched_clock_ops);238238+277239 return 0;278240}279241device_initcall(sched_clock_syscore_init);
+233-112
kernel/time/timekeeping.c
···5959};60606161static struct tk_fast tk_fast_mono ____cacheline_aligned;6262+static struct tk_fast tk_fast_raw ____cacheline_aligned;62636364/* flag for if timekeeping is suspended */6465int __read_mostly timekeeping_suspended;···69687069static inline void tk_normalize_xtime(struct timekeeper *tk)7170{7272- while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {7373- tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;7171+ while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {7272+ tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;7473 tk->xtime_sec++;7574 }7675}···8079 struct timespec64 ts;81808281 ts.tv_sec = tk->xtime_sec;8383- ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);8282+ ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);8483 return ts;8584}86858786static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)8887{8988 tk->xtime_sec = ts->tv_sec;9090- tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;8989+ tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;9190}92919392static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)9493{9594 tk->xtime_sec += ts->tv_sec;9696- tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;9595+ tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;9796 tk_normalize_xtime(tk);9897}9998···119118 tk->offs_boot = ktime_add(tk->offs_boot, delta);120119}121120121121+#ifdef CONFIG_DEBUG_TIMEKEEPING122122+#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */123123+/*124124+ * These simple flag variables are managed125125+ * without locks, which is racy, but ok since126126+ * we don't really care about being super127127+ * precise about how many events were seen,128128+ * just that a problem was observed.129129+ */130130+static int timekeeping_underflow_seen;131131+static int timekeeping_overflow_seen;132132+133133+/* last_warning is only modified under the timekeeping lock */134134+static long timekeeping_last_warning;135135+136136+static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)137137+{138138+139139+ cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;140140+ const char *name = tk->tkr_mono.clock->name;141141+142142+ if (offset > max_cycles) {143143+ printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",144144+ offset, name, max_cycles);145145+ printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");146146+ } else {147147+ if (offset > (max_cycles >> 1)) {148148+ printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",149149+ offset, name, max_cycles >> 1);150150+ printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");151151+ }152152+ }153153+154154+ if (timekeeping_underflow_seen) {155155+ if (jiffies - timekeeping_last_warning > WARNING_FREQ) {156156+ printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);157157+ printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");158158+ printk_deferred(" Your kernel is probably still fine.\n");159159+ timekeeping_last_warning = jiffies;160160+ }161161+ timekeeping_underflow_seen = 0;162162+ }163163+164164+ if (timekeeping_overflow_seen) {165165+ if (jiffies - timekeeping_last_warning > WARNING_FREQ) {166166+ printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);167167+ printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");168168+ printk_deferred(" Your kernel is probably still fine.\n");169169+ timekeeping_last_warning = jiffies;170170+ }171171+ timekeeping_overflow_seen = 0;172172+ }173173+}174174+175175+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)176176+{177177+ cycle_t now, last, mask, max, delta;178178+ unsigned int seq;179179+180180+ /*181181+ * Since we're called holding a seqlock, the data may shift182182+ * under us while we're doing the calculation. This can cause183183+ * false positives, since we'd note a problem but throw the184184+ * results away. So nest another seqlock here to atomically185185+ * grab the points we are checking with.186186+ */187187+ do {188188+ seq = read_seqcount_begin(&tk_core.seq);189189+ now = tkr->read(tkr->clock);190190+ last = tkr->cycle_last;191191+ mask = tkr->mask;192192+ max = tkr->clock->max_cycles;193193+ } while (read_seqcount_retry(&tk_core.seq, seq));194194+195195+ delta = clocksource_delta(now, last, mask);196196+197197+ /*198198+ * Try to catch underflows by checking if we are seeing small199199+ * mask-relative negative values.200200+ */201201+ if (unlikely((~delta & mask) < (mask >> 3))) {202202+ timekeeping_underflow_seen = 1;203203+ delta = 0;204204+ }205205+206206+ /* Cap delta value to the max_cycles values to avoid mult overflows */207207+ if (unlikely(delta > max)) {208208+ timekeeping_overflow_seen = 1;209209+ delta = tkr->clock->max_cycles;210210+ }211211+212212+ return delta;213213+}214214+#else215215+static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)216216+{217217+}218218+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)219219+{220220+ cycle_t cycle_now, delta;221221+222222+ /* read clocksource */223223+ cycle_now = tkr->read(tkr->clock);224224+225225+ /* calculate the delta since the last update_wall_time */226226+ delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);227227+228228+ return delta;229229+}230230+#endif231231+122232/**123233 * tk_setup_internals - Set up internals to use clocksource clock.124234 *···247135 u64 tmp, ntpinterval;248136 struct clocksource *old_clock;249137250250- old_clock = tk->tkr.clock;251251- tk->tkr.clock = clock;252252- tk->tkr.read = clock->read;253253- tk->tkr.mask = clock->mask;254254- tk->tkr.cycle_last = tk->tkr.read(clock);138138+ old_clock = tk->tkr_mono.clock;139139+ tk->tkr_mono.clock = clock;140140+ tk->tkr_mono.read = clock->read;141141+ tk->tkr_mono.mask = clock->mask;142142+ tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);143143+144144+ tk->tkr_raw.clock = clock;145145+ tk->tkr_raw.read = clock->read;146146+ tk->tkr_raw.mask = clock->mask;147147+ tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;255148256149 /* Do the ns -> cycle conversion first, using original mult */257150 tmp = NTP_INTERVAL_LENGTH;···280163 if (old_clock) {281164 int shift_change = clock->shift - old_clock->shift;282165 if (shift_change < 0)283283- tk->tkr.xtime_nsec >>= -shift_change;166166+ tk->tkr_mono.xtime_nsec >>= -shift_change;284167 else285285- tk->tkr.xtime_nsec <<= shift_change;168168+ tk->tkr_mono.xtime_nsec <<= shift_change;286169 }287287- tk->tkr.shift = clock->shift;170170+ tk->tkr_raw.xtime_nsec = 0;171171+172172+ tk->tkr_mono.shift = clock->shift;173173+ tk->tkr_raw.shift = clock->shift;288174289175 tk->ntp_error = 0;290176 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;···298178 * active clocksource. These value will be adjusted via NTP299179 * to counteract clock drifting.300180 */301301- tk->tkr.mult = clock->mult;181181+ tk->tkr_mono.mult = clock->mult;182182+ tk->tkr_raw.mult = clock->mult;302183 tk->ntp_err_mult = 0;303184}304185···314193315194static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)316195{317317- cycle_t cycle_now, delta;196196+ cycle_t delta;318197 s64 nsec;319198320320- /* read clocksource: */321321- cycle_now = tkr->read(tkr->clock);322322-323323- /* calculate the delta since the last update_wall_time: */324324- delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);199199+ delta = timekeeping_get_delta(tkr);325200326201 nsec = delta * tkr->mult + tkr->xtime_nsec;327202 nsec >>= tkr->shift;328328-329329- /* If arch requires, add in get_arch_timeoffset() */330330- return nsec + arch_gettimeoffset();331331-}332332-333333-static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)334334-{335335- struct clocksource *clock = tk->tkr.clock;336336- cycle_t cycle_now, delta;337337- s64 nsec;338338-339339- /* read clocksource: */340340- cycle_now = tk->tkr.read(clock);341341-342342- /* calculate the delta since the last update_wall_time: */343343- delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);344344-345345- /* convert delta to nanoseconds. */346346- nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);347203348204 /* If arch requires, add in get_arch_timeoffset() */349205 return nsec + arch_gettimeoffset();···365267 * slightly wrong timestamp (a few nanoseconds). See366268 * @ktime_get_mono_fast_ns.367269 */368368-static void update_fast_timekeeper(struct tk_read_base *tkr)270270+static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)369271{370370- struct tk_read_base *base = tk_fast_mono.base;272272+ struct tk_read_base *base = tkf->base;371273372274 /* Force readers off to base[1] */373373- raw_write_seqcount_latch(&tk_fast_mono.seq);275275+ raw_write_seqcount_latch(&tkf->seq);374276375277 /* Update base[0] */376278 memcpy(base, tkr, sizeof(*base));377279378280 /* Force readers back to base[0] */379379- raw_write_seqcount_latch(&tk_fast_mono.seq);281281+ raw_write_seqcount_latch(&tkf->seq);380282381283 /* Update base[1] */382284 memcpy(base + 1, base, sizeof(*base));···414316 * of the following timestamps. Callers need to be aware of that and415317 * deal with it.416318 */417417-u64 notrace ktime_get_mono_fast_ns(void)319319+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)418320{419321 struct tk_read_base *tkr;420322 unsigned int seq;421323 u64 now;422324423325 do {424424- seq = raw_read_seqcount(&tk_fast_mono.seq);425425- tkr = tk_fast_mono.base + (seq & 0x01);426426- now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);326326+ seq = raw_read_seqcount(&tkf->seq);327327+ tkr = tkf->base + (seq & 0x01);328328+ now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);329329+ } while (read_seqcount_retry(&tkf->seq, seq));427330428428- } while (read_seqcount_retry(&tk_fast_mono.seq, seq));429331 return now;430332}333333+334334+u64 ktime_get_mono_fast_ns(void)335335+{336336+ return __ktime_get_fast_ns(&tk_fast_mono);337337+}431338EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);339339+340340+u64 ktime_get_raw_fast_ns(void)341341+{342342+ return __ktime_get_fast_ns(&tk_fast_raw);343343+}344344+EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);432345433346/* Suspend-time cycles value for halted fast timekeeper. */434347static cycle_t cycles_at_suspend;···462353static void halt_fast_timekeeper(struct timekeeper *tk)463354{464355 static struct tk_read_base tkr_dummy;465465- struct tk_read_base *tkr = &tk->tkr;356356+ struct tk_read_base *tkr = &tk->tkr_mono;466357467358 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));468359 cycles_at_suspend = tkr->read(tkr->clock);469360 tkr_dummy.read = dummy_clock_read;470470- update_fast_timekeeper(&tkr_dummy);361361+ update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);362362+363363+ tkr = &tk->tkr_raw;364364+ memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));365365+ tkr_dummy.read = dummy_clock_read;366366+ update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);471367}472368473369#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD···483369484370 xt = timespec64_to_timespec(tk_xtime(tk));485371 wm = timespec64_to_timespec(tk->wall_to_monotonic);486486- update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,487487- tk->tkr.cycle_last);372372+ update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,373373+ tk->tkr_mono.cycle_last);488374}489375490376static inline void old_vsyscall_fixup(struct timekeeper *tk)···501387 * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD502388 * users are removed, this can be killed.503389 */504504- remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);505505- tk->tkr.xtime_nsec -= remainder;506506- tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;390390+ remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);391391+ tk->tkr_mono.xtime_nsec -= remainder;392392+ tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;507393 tk->ntp_error += remainder << tk->ntp_error_shift;508508- tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;394394+ tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;509395}510396#else511397#define old_vsyscall_fixup(tk)···570456 */571457 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);572458 nsec = (u32) tk->wall_to_monotonic.tv_nsec;573573- tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);459459+ tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);574460575461 /* Update the monotonic raw base */576576- tk->base_raw = timespec64_to_ktime(tk->raw_time);462462+ tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);577463578464 /*579465 * The sum of the nanoseconds portions of xtime and580466 * wall_to_monotonic can be greater/equal one second. Take581467 * this into account before updating tk->ktime_sec.582468 */583583- nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);469469+ nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);584470 if (nsec >= NSEC_PER_SEC)585471 seconds++;586472 tk->ktime_sec = seconds;···603489 memcpy(&shadow_timekeeper, &tk_core.timekeeper,604490 sizeof(tk_core.timekeeper));605491606606- update_fast_timekeeper(&tk->tkr);492492+ update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);493493+ update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);607494}608495609496/**···616501 */617502static void timekeeping_forward_now(struct timekeeper *tk)618503{619619- struct clocksource *clock = tk->tkr.clock;504504+ struct clocksource *clock = tk->tkr_mono.clock;620505 cycle_t cycle_now, delta;621506 s64 nsec;622507623623- cycle_now = tk->tkr.read(clock);624624- delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);625625- tk->tkr.cycle_last = cycle_now;508508+ cycle_now = tk->tkr_mono.read(clock);509509+ delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);510510+ tk->tkr_mono.cycle_last = cycle_now;511511+ tk->tkr_raw.cycle_last = cycle_now;626512627627- tk->tkr.xtime_nsec += delta * tk->tkr.mult;513513+ tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;628514629515 /* If arch requires, add in get_arch_timeoffset() */630630- tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;516516+ tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;631517632518 tk_normalize_xtime(tk);633519634634- nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);520520+ nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);635521 timespec64_add_ns(&tk->raw_time, nsec);636522}637523···653537 seq = read_seqcount_begin(&tk_core.seq);654538655539 ts->tv_sec = tk->xtime_sec;656656- nsecs = timekeeping_get_ns(&tk->tkr);540540+ nsecs = timekeeping_get_ns(&tk->tkr_mono);657541658542 } while (read_seqcount_retry(&tk_core.seq, seq));659543···693577694578 do {695579 seq = read_seqcount_begin(&tk_core.seq);696696- base = tk->tkr.base_mono;697697- nsecs = timekeeping_get_ns(&tk->tkr);580580+ base = tk->tkr_mono.base;581581+ nsecs = timekeeping_get_ns(&tk->tkr_mono);698582699583 } while (read_seqcount_retry(&tk_core.seq, seq));700584···719603720604 do {721605 seq = read_seqcount_begin(&tk_core.seq);722722- base = ktime_add(tk->tkr.base_mono, *offset);723723- nsecs = timekeeping_get_ns(&tk->tkr);606606+ base = ktime_add(tk->tkr_mono.base, *offset);607607+ nsecs = timekeeping_get_ns(&tk->tkr_mono);724608725609 } while (read_seqcount_retry(&tk_core.seq, seq));726610···761645762646 do {763647 seq = read_seqcount_begin(&tk_core.seq);764764- base = tk->base_raw;765765- nsecs = timekeeping_get_ns_raw(tk);648648+ base = tk->tkr_raw.base;649649+ nsecs = timekeeping_get_ns(&tk->tkr_raw);766650767651 } while (read_seqcount_retry(&tk_core.seq, seq));768652···790674 do {791675 seq = read_seqcount_begin(&tk_core.seq);792676 ts->tv_sec = tk->xtime_sec;793793- nsec = timekeeping_get_ns(&tk->tkr);677677+ nsec = timekeeping_get_ns(&tk->tkr_mono);794678 tomono = tk->wall_to_monotonic;795679796680 } while (read_seqcount_retry(&tk_core.seq, seq));···875759 ts_real->tv_sec = tk->xtime_sec;876760 ts_real->tv_nsec = 0;877761878878- nsecs_raw = timekeeping_get_ns_raw(tk);879879- nsecs_real = timekeeping_get_ns(&tk->tkr);762762+ nsecs_raw = timekeeping_get_ns(&tk->tkr_raw);763763+ nsecs_real = timekeeping_get_ns(&tk->tkr_mono);880764881765 } while (read_seqcount_retry(&tk_core.seq, seq));882766···1059943 */1060944 if (try_module_get(new->owner)) {1061945 if (!new->enable || new->enable(new) == 0) {10621062- old = tk->tkr.clock;946946+ old = tk->tkr_mono.clock;1063947 tk_setup_internals(tk, new);1064948 if (old->disable)1065949 old->disable(old);···1087971{1088972 struct timekeeper *tk = &tk_core.timekeeper;108997310901090- if (tk->tkr.clock == clock)974974+ if (tk->tkr_mono.clock == clock)1091975 return 0;1092976 stop_machine(change_clocksource, clock, NULL);1093977 tick_clock_notify();10941094- return tk->tkr.clock == clock ? 0 : -1;978978+ return tk->tkr_mono.clock == clock ? 0 : -1;1095979}10969801097981/**···11099931110994 do {1111995 seq = read_seqcount_begin(&tk_core.seq);11121112- nsecs = timekeeping_get_ns_raw(tk);996996+ nsecs = timekeeping_get_ns(&tk->tkr_raw);1113997 ts64 = tk->raw_time;11149981115999 } while (read_seqcount_retry(&tk_core.seq, seq));···11321016 do {11331017 seq = read_seqcount_begin(&tk_core.seq);1134101811351135- ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;10191019+ ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;1136102011371021 } while (read_seqcount_retry(&tk_core.seq, seq));11381022···11511035 do {11521036 seq = read_seqcount_begin(&tk_core.seq);1153103711541154- ret = tk->tkr.clock->max_idle_ns;10381038+ ret = tk->tkr_mono.clock->max_idle_ns;1155103911561040 } while (read_seqcount_retry(&tk_core.seq, seq));11571041···12301114 tk_set_xtime(tk, &now);12311115 tk->raw_time.tv_sec = 0;12321116 tk->raw_time.tv_nsec = 0;12331233- tk->base_raw.tv64 = 0;12341117 if (boot.tv_sec == 0 && boot.tv_nsec == 0)12351118 boot = tk_xtime(tk);12361119···13151200void timekeeping_resume(void)13161201{13171202 struct timekeeper *tk = &tk_core.timekeeper;13181318- struct clocksource *clock = tk->tkr.clock;12031203+ struct clocksource *clock = tk->tkr_mono.clock;13191204 unsigned long flags;13201205 struct timespec64 ts_new, ts_delta;13211206 struct timespec tmp;···13431228 * The less preferred source will only be tried if there is no better13441229 * usable source. The rtc part is handled separately in rtc core code.13451230 */13461346- cycle_now = tk->tkr.read(clock);12311231+ cycle_now = tk->tkr_mono.read(clock);13471232 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&13481348- cycle_now > tk->tkr.cycle_last) {12331233+ cycle_now > tk->tkr_mono.cycle_last) {13491234 u64 num, max = ULLONG_MAX;13501235 u32 mult = clock->mult;13511236 u32 shift = clock->shift;13521237 s64 nsec = 0;1353123813541354- cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,13551355- tk->tkr.mask);12391239+ cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,12401240+ tk->tkr_mono.mask);1356124113571242 /*13581243 * "cycle_delta * mutl" may cause 64 bits overflow, if the···13781263 __timekeeping_inject_sleeptime(tk, &ts_delta);1379126413801265 /* Re-base the last cycle value */13811381- tk->tkr.cycle_last = cycle_now;12661266+ tk->tkr_mono.cycle_last = cycle_now;12671267+ tk->tkr_raw.cycle_last = cycle_now;12681268+13821269 tk->ntp_error = 0;13831270 timekeeping_suspended = 0;13841271 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);···15331416 *15341417 * XXX - TODO: Doc ntp_error calculation.15351418 */15361536- if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {14191419+ if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {15371420 /* NTP adjustment caused clocksource mult overflow */15381421 WARN_ON_ONCE(1);15391422 return;15401423 }1541142415421542- tk->tkr.mult += mult_adj;14251425+ tk->tkr_mono.mult += mult_adj;15431426 tk->xtime_interval += interval;15441544- tk->tkr.xtime_nsec -= offset;14271427+ tk->tkr_mono.xtime_nsec -= offset;15451428 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;15461429}15471430···16031486 tk->ntp_err_mult = 0;16041487 }1605148816061606- if (unlikely(tk->tkr.clock->maxadj &&16071607- (abs(tk->tkr.mult - tk->tkr.clock->mult)16081608- > tk->tkr.clock->maxadj))) {14891489+ if (unlikely(tk->tkr_mono.clock->maxadj &&14901490+ (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)14911491+ > tk->tkr_mono.clock->maxadj))) {16091492 printk_once(KERN_WARNING16101493 "Adjusting %s more than 11%% (%ld vs %ld)\n",16111611- tk->tkr.clock->name, (long)tk->tkr.mult,16121612- (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);14941494+ tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,14951495+ (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);16131496 }1614149716151498 /*···16261509 * We'll correct this error next time through this function, when16271510 * xtime_nsec is not as small.16281511 */16291629- if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {16301630- s64 neg = -(s64)tk->tkr.xtime_nsec;16311631- tk->tkr.xtime_nsec = 0;15121512+ if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {15131513+ s64 neg = -(s64)tk->tkr_mono.xtime_nsec;15141514+ tk->tkr_mono.xtime_nsec = 0;16321515 tk->ntp_error += neg << tk->ntp_error_shift;16331516 }16341517}···16431526 */16441527static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)16451528{16461646- u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;15291529+ u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;16471530 unsigned int clock_set = 0;1648153116491649- while (tk->tkr.xtime_nsec >= nsecps) {15321532+ while (tk->tkr_mono.xtime_nsec >= nsecps) {16501533 int leap;1651153416521652- tk->tkr.xtime_nsec -= nsecps;15351535+ tk->tkr_mono.xtime_nsec -= nsecps;16531536 tk->xtime_sec++;1654153716551538 /* Figure out if its a leap sec and apply if needed */···1694157716951578 /* Accumulate one shifted interval */16961579 offset -= interval;16971697- tk->tkr.cycle_last += interval;15801580+ tk->tkr_mono.cycle_last += interval;15811581+ tk->tkr_raw.cycle_last += interval;1698158216991699- tk->tkr.xtime_nsec += tk->xtime_interval << shift;15831583+ tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;17001584 *clock_set |= accumulate_nsecs_to_secs(tk);1701158517021586 /* Accumulate raw time */···17401622#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET17411623 offset = real_tk->cycle_interval;17421624#else17431743- offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),17441744- tk->tkr.cycle_last, tk->tkr.mask);16251625+ offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),16261626+ tk->tkr_mono.cycle_last, tk->tkr_mono.mask);17451627#endif1746162817471629 /* Check if there's really nothing to do */17481630 if (offset < real_tk->cycle_interval)17491631 goto out;16321632+16331633+ /* Do some additional sanity checking */16341634+ timekeeping_check_update(real_tk, offset);1750163517511636 /*17521637 * With NO_HZ we may have to accumulate many cycle_intervals···19051784 do {19061785 seq = read_seqcount_begin(&tk_core.seq);1907178619081908- base = tk->tkr.base_mono;19091909- nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;17871787+ base = tk->tkr_mono.base;17881788+ nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;1910178919111790 *offs_real = tk->offs_real;19121791 *offs_boot = tk->offs_boot;···19371816 do {19381817 seq = read_seqcount_begin(&tk_core.seq);1939181819401940- base = tk->tkr.base_mono;19411941- nsecs = timekeeping_get_ns(&tk->tkr);18191819+ base = tk->tkr_mono.base;18201820+ nsecs = timekeeping_get_ns(&tk->tkr_mono);1942182119431822 *offs_real = tk->offs_real;19441823 *offs_boot = tk->offs_boot;
···865865 data corruption or a sporadic crash at a later stage once the region866866 is examined. The runtime overhead introduced is minimal.867867868868+config DEBUG_TIMEKEEPING869869+ bool "Enable extra timekeeping sanity checking"870870+ help871871+ This option will enable additional timekeeping sanity checks872872+ which may be helpful when diagnosing issues where timekeeping873873+ problems are suspected.874874+875875+ This may include checks in the timekeeping hotpaths, so this876876+ option may have a (very small) performance impact to some877877+ workloads.878878+879879+ If unsure, say N.880880+868881config TIMER_STATS869882 bool "Collect kernel timers statistics"870883 depends on DEBUG_KERNEL && PROC_FS
+13-13
mm/huge_memory.c
···12601260 int target_nid, last_cpupid = -1;12611261 bool page_locked;12621262 bool migrated = false;12631263+ bool was_writable;12631264 int flags = 0;1264126512651266 /* A PROT_NONE fault should not end up here */···12921291 flags |= TNF_FAULT_LOCAL;12931292 }1294129312951295- /*12961296- * Avoid grouping on DSO/COW pages in specific and RO pages12971297- * in general, RO pages shouldn't hurt as much anyway since12981298- * they can be in shared cache state.12991299- *13001300- * FIXME! This checks "pmd_dirty()" as an approximation of13011301- * "is this a read-only page", since checking "pmd_write()"13021302- * is even more broken. We haven't actually turned this into13031303- * a writable page, so pmd_write() will always be false.13041304- */13051305- if (!pmd_dirty(pmd))12941294+ /* See similar comment in do_numa_page for explanation */12951295+ if (!(vma->vm_flags & VM_WRITE))13061296 flags |= TNF_NO_GROUP;1307129713081298 /*···13501358 if (migrated) {13511359 flags |= TNF_MIGRATED;13521360 page_nid = target_nid;13531353- }13611361+ } else13621362+ flags |= TNF_MIGRATE_FAIL;1354136313551364 goto out;13561365clear_pmdnuma:13571366 BUG_ON(!PageLocked(page));13671367+ was_writable = pmd_write(pmd);13581368 pmd = pmd_modify(pmd, vma->vm_page_prot);13691369+ pmd = pmd_mkyoung(pmd);13701370+ if (was_writable)13711371+ pmd = pmd_mkwrite(pmd);13591372 set_pmd_at(mm, haddr, pmdp, pmd);13601373 update_mmu_cache_pmd(vma, addr, pmdp);13611374 unlock_page(page);···1484148714851488 if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {14861489 pmd_t entry;14901490+ bool preserve_write = prot_numa && pmd_write(*pmd);14871491 ret = 1;1488149214891493 /*···15001502 if (!prot_numa || !pmd_protnone(*pmd)) {15011503 entry = pmdp_get_and_clear_notify(mm, addr, pmd);15021504 entry = pmd_modify(entry, newprot);15051505+ if (preserve_write)15061506+ entry = pmd_mkwrite(entry);15031507 ret = HPAGE_PMD_NR;15041508 set_pmd_at(mm, addr, pmd, entry);15051505- BUG_ON(pmd_write(entry));15091509+ BUG_ON(!preserve_write && pmd_write(entry));15061510 }15071511 spin_unlock(ptl);15081512 }
+12-10
mm/memory.c
···30353035 int last_cpupid;30363036 int target_nid;30373037 bool migrated = false;30383038+ bool was_writable = pte_write(pte);30383039 int flags = 0;3039304030403041 /* A PROT_NONE fault should not end up here */···30603059 /* Make it present again */30613060 pte = pte_modify(pte, vma->vm_page_prot);30623061 pte = pte_mkyoung(pte);30623062+ if (was_writable)30633063+ pte = pte_mkwrite(pte);30633064 set_pte_at(mm, addr, ptep, pte);30643065 update_mmu_cache(vma, addr, ptep);30653066···30723069 }3073307030743071 /*30753075- * Avoid grouping on DSO/COW pages in specific and RO pages30763076- * in general, RO pages shouldn't hurt as much anyway since30773077- * they can be in shared cache state.30783078- *30793079- * FIXME! This checks "pmd_dirty()" as an approximation of30803080- * "is this a read-only page", since checking "pmd_write()"30813081- * is even more broken. We haven't actually turned this into30823082- * a writable page, so pmd_write() will always be false.30723072+ * Avoid grouping on RO pages in general. RO pages shouldn't hurt as30733073+ * much anyway since they can be in shared cache state. This misses30743074+ * the case where a mapping is writable but the process never writes30753075+ * to it but pte_write gets cleared during protection updates and30763076+ * pte_dirty has unpredictable behaviour between PTE scan updates,30773077+ * background writeback, dirty balancing and application behaviour.30833078 */30843084- if (!pte_dirty(pte))30793079+ if (!(vma->vm_flags & VM_WRITE))30853080 flags |= TNF_NO_GROUP;3086308130873082 /*···31033102 if (migrated) {31043103 page_nid = target_nid;31053104 flags |= TNF_MIGRATED;31063106- }31053105+ } else31063106+ flags |= TNF_MIGRATE_FAIL;3107310731083108out:31093109 if (page_nid != -1)
+4-9
mm/memory_hotplug.c
···10921092 return NULL;1093109310941094 arch_refresh_nodedata(nid, pgdat);10951095+ } else {10961096+ /* Reset the nr_zones and classzone_idx to 0 before reuse */10971097+ pgdat->nr_zones = 0;10981098+ pgdat->classzone_idx = 0;10951099 }1096110010971101 /* we can use NODE_DATA(nid) from here */···19811977 if (is_vmalloc_addr(zone->wait_table))19821978 vfree(zone->wait_table);19831979 }19841984-19851985- /*19861986- * Since there is no way to guarentee the address of pgdat/zone is not19871987- * on stack of any kernel threads or used by other kernel objects19881988- * without reference counting or other symchronizing method, do not19891989- * reset node_data and free pgdat here. Just reset it to 0 and reuse19901990- * the memory when the node is online again.19911991- */19921992- memset(pgdat, 0, sizeof(*pgdat));19931980}19941981EXPORT_SYMBOL(try_offline_node);19951982
···265265 vma = vma->vm_next;266266267267 err = walk_page_test(start, next, walk);268268- if (err > 0)268268+ if (err > 0) {269269+ /*270270+ * positive return values are purely for271271+ * controlling the pagewalk, so should never272272+ * be passed to the callers.273273+ */274274+ err = 0;269275 continue;276276+ }270277 if (err < 0)271278 break;272279 }
+7
mm/rmap.c
···287287 return 0;288288289289 enomem_failure:290290+ /*291291+ * dst->anon_vma is dropped here otherwise its degree can be incorrectly292292+ * decremented in unlink_anon_vmas().293293+ * We can safely do this because callers of anon_vma_clone() don't care294294+ * about dst->anon_vma if anon_vma_clone() failed.295295+ */296296+ dst->anon_vma = NULL;290297 unlink_anon_vmas(dst);291298 return -ENOMEM;292299}
+4-2
mm/slub.c
···24492449 do {24502450 tid = this_cpu_read(s->cpu_slab->tid);24512451 c = raw_cpu_ptr(s->cpu_slab);24522452- } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));24522452+ } while (IS_ENABLED(CONFIG_PREEMPT) &&24532453+ unlikely(tid != READ_ONCE(c->tid)));2453245424542455 /*24552456 * Irqless object alloc/free algorithm used here depends on sequence···27192718 do {27202719 tid = this_cpu_read(s->cpu_slab->tid);27212720 c = raw_cpu_ptr(s->cpu_slab);27222722- } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));27212721+ } while (IS_ENABLED(CONFIG_PREEMPT) &&27222722+ unlikely(tid != READ_ONCE(c->tid)));2723272327242724 /* Same with comment on barrier() in slab_alloc_node() */27252725 barrier();
···27732773 } else {27742774 /* Socket is locked, keep trying until memory is available. */27752775 for (;;) {27762776- skb = alloc_skb_fclone(MAX_TCP_HEADER,27772777- sk->sk_allocation);27762776+ skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);27782777 if (skb)27792778 break;27802779 yield();27812780 }27822782-27832783- /* Reserve space for headers and prepare control bits. */27842784- skb_reserve(skb, MAX_TCP_HEADER);27852781 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */27862782 tcp_init_nondata_skb(skb, tp->write_seq,27872783 TCPHDR_ACK | TCPHDR_FIN);
···112112 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);113113 fptr->nexthdr = nexthdr;114114 fptr->reserved = 0;115115- if (skb_shinfo(skb)->ip6_frag_id)116116- fptr->identification = skb_shinfo(skb)->ip6_frag_id;117117- else118118- ipv6_select_ident(fptr,119119- (struct rt6_info *)skb_dst(skb));115115+ if (!skb_shinfo(skb)->ip6_frag_id)116116+ ipv6_proxy_select_ident(skb);117117+ fptr->identification = skb_shinfo(skb)->ip6_frag_id;120118121119 /* Fragment the skb. ipv6 header and the remaining fields of the122120 * fragment header are updated in ipv6_gso_segment()
···7777 if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])7878 return -EINVAL;79798080+ /* Not all fields are initialized so first zero the tuple */8181+ memset(tuple, 0, sizeof(struct nf_conntrack_tuple));8282+8083 tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));8184 tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);8285
···513513{514514 const struct ip6t_ip6 *i = par->entryinfo;515515516516- if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)517517- && !(i->flags & IP6T_INV_PROTO))516516+ if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) &&517517+ !(i->invflags & IP6T_INV_PROTO))518518 return 0;519519520520 pr_info("Can be used only in combination with "
+4
net/socket.c
···1702170217031703 if (len > INT_MAX)17041704 len = INT_MAX;17051705+ if (unlikely(!access_ok(VERIFY_READ, buff, len)))17061706+ return -EFAULT;17051707 sock = sockfd_lookup_light(fd, &err, &fput_needed);17061708 if (!sock)17071709 goto out;···1762176017631761 if (size > INT_MAX)17641762 size = INT_MAX;17631763+ if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size)))17641764+ return -EFAULT;17651765 sock = sockfd_lookup_light(fd, &err, &fput_needed);17661766 if (!sock)17671767 goto out;
+8
tools/testing/selftests/Makefile
···2222TARGETS_HOTPLUG = cpu-hotplug2323TARGETS_HOTPLUG += memory-hotplug24242525+# Clear LDFLAGS and MAKEFLAGS if called from main2626+# Makefile to avoid test build failures when test2727+# Makefile doesn't have explicit build rules.2828+ifeq (1,$(MAKELEVEL))2929+undefine LDFLAGS3030+override MAKEFLAGS =3131+endif3232+2533all:2634 for TARGET in $(TARGETS); do \2735 make -C $$TARGET; \
+7-7
virt/kvm/kvm_main.c
···471471 BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);472472473473 r = -ENOMEM;474474- kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);474474+ kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots));475475 if (!kvm->memslots)476476 goto out_err_no_srcu;477477···522522out_err_no_disable:523523 for (i = 0; i < KVM_NR_BUSES; i++)524524 kfree(kvm->buses[i]);525525- kfree(kvm->memslots);525525+ kvfree(kvm->memslots);526526 kvm_arch_free_vm(kvm);527527 return ERR_PTR(r);528528}···578578 kvm_for_each_memslot(memslot, slots)579579 kvm_free_physmem_slot(kvm, memslot, NULL);580580581581- kfree(kvm->memslots);581581+ kvfree(kvm->memslots);582582}583583584584static void kvm_destroy_devices(struct kvm *kvm)···871871 goto out_free;872872 }873873874874- slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),875875- GFP_KERNEL);874874+ slots = kvm_kvzalloc(sizeof(struct kvm_memslots));876875 if (!slots)877876 goto out_free;877877+ memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));878878879879 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {880880 slot = id_to_memslot(slots, mem->slot);···917917 kvm_arch_commit_memory_region(kvm, mem, &old, change);918918919919 kvm_free_physmem_slot(kvm, &old, &new);920920- kfree(old_memslots);920920+ kvfree(old_memslots);921921922922 /*923923 * IOMMU mapping: New slots need to be mapped. Old slots need to be···936936 return 0;937937938938out_slots:939939- kfree(slots);939939+ kvfree(slots);940940out_free:941941 kvm_free_physmem_slot(kvm, &new, &old);942942out: