Merge branch 'perf/timer' into perf/core

+10 -3

MAINTAINERS

··· 1186 1186 L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 1187 1187 S: Maintained 1188 1188 F: arch/arm/mach-mvebu/ 1189 - F: drivers/rtc/armada38x-rtc 1189 + F: drivers/rtc/rtc-armada38x.c 1190 1190 1191 1191 ARM/Marvell Berlin SoC support 1192 1192 M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> ··· 1675 1675 F: include/linux/platform_data/at24.h 1676 1676 1677 1677 ATA OVER ETHERNET (AOE) DRIVER 1678 - M: "Ed L. Cashin" <ecashin@coraid.com> 1679 - W: http://support.coraid.com/support/linux 1678 + M: "Ed L. Cashin" <ed.cashin@acm.org> 1679 + W: http://www.openaoe.org/ 1680 1680 S: Supported 1681 1681 F: Documentation/aoe/ 1682 1682 F: drivers/block/aoe/ ··· 3251 3251 S: Maintained 3252 3252 F: Documentation/hwmon/dme1737 3253 3253 F: drivers/hwmon/dme1737.c 3254 + 3255 + DMI/SMBIOS SUPPORT 3256 + M: Jean Delvare <jdelvare@suse.de> 3257 + S: Maintained 3258 + F: drivers/firmware/dmi-id.c 3259 + F: drivers/firmware/dmi_scan.c 3260 + F: include/linux/dmi.h 3254 3261 3255 3262 DOCKING STATION DRIVER 3256 3263 M: Shaohua Li <shaohua.li@intel.com>

+1 -1

arch/arm/plat-omap/counter_32k.c

··· 103 103 104 104 /* 105 105 * 120000 rough estimate from the calculations in 106 - * __clocksource_updatefreq_scale. 106 + * __clocksource_update_freq_scale. 107 107 */ 108 108 clocks_calc_mult_shift(&persistent_mult, &persistent_shift, 109 109 32768, NSEC_PER_SEC, 120000);

+23 -7

arch/arm64/include/asm/cmpxchg.h

··· 246 246 __ret; \ 247 247 }) 248 248 249 - #define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) 250 - #define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) 251 - #define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) 252 - #define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) 249 + #define _protect_cmpxchg_local(pcp, o, n) \ 250 + ({ \ 251 + typeof(*raw_cpu_ptr(&(pcp))) __ret; \ 252 + preempt_disable(); \ 253 + __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \ 254 + preempt_enable(); \ 255 + __ret; \ 256 + }) 253 257 254 - #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ 255 - cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \ 256 - o1, o2, n1, n2) 258 + #define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) 259 + #define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) 260 + #define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) 261 + #define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) 262 + 263 + #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ 264 + ({ \ 265 + int __ret; \ 266 + preempt_disable(); \ 267 + __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ 268 + raw_cpu_ptr(&(ptr2)), \ 269 + o1, o2, n1, n2); \ 270 + preempt_enable(); \ 271 + __ret; \ 272 + }) 257 273 258 274 #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) 259 275 #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))

+9

arch/arm64/include/asm/mmu_context.h

··· 151 151 { 152 152 unsigned int cpu = smp_processor_id(); 153 153 154 + /* 155 + * init_mm.pgd does not contain any user mappings and it is always 156 + * active for kernel addresses in TTBR1. Just set the reserved TTBR0. 157 + */ 158 + if (next == &init_mm) { 159 + cpu_set_reserved_ttbr0(); 160 + return; 161 + } 162 + 154 163 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) 155 164 check_and_switch_context(next, tsk); 156 165 }

+34 -12

arch/arm64/include/asm/percpu.h

··· 204 204 return ret; 205 205 } 206 206 207 - #define _percpu_add(pcp, val) \ 208 - __percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) 207 + #define _percpu_read(pcp) \ 208 + ({ \ 209 + typeof(pcp) __retval; \ 210 + preempt_disable(); \ 211 + __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \ 212 + sizeof(pcp)); \ 213 + preempt_enable(); \ 214 + __retval; \ 215 + }) 209 216 210 - #define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val)) 217 + #define _percpu_write(pcp, val) \ 218 + do { \ 219 + preempt_disable(); \ 220 + __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \ 221 + sizeof(pcp)); \ 222 + preempt_enable(); \ 223 + } while(0) \ 224 + 225 + #define _pcp_protect(operation, pcp, val) \ 226 + ({ \ 227 + typeof(pcp) __retval; \ 228 + preempt_disable(); \ 229 + __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \ 230 + (val), sizeof(pcp)); \ 231 + preempt_enable(); \ 232 + __retval; \ 233 + }) 234 + 235 + #define _percpu_add(pcp, val) \ 236 + _pcp_protect(__percpu_add, pcp, val) 237 + 238 + #define _percpu_add_return(pcp, val) _percpu_add(pcp, val) 211 239 212 240 #define _percpu_and(pcp, val) \ 213 - __percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) 241 + _pcp_protect(__percpu_and, pcp, val) 214 242 215 243 #define _percpu_or(pcp, val) \ 216 - __percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) 217 - 218 - #define _percpu_read(pcp) (typeof(pcp)) \ 219 - (__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp))) 220 - 221 - #define _percpu_write(pcp, val) \ 222 - __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)) 244 + _pcp_protect(__percpu_or, pcp, val) 223 245 224 246 #define _percpu_xchg(pcp, val) (typeof(pcp)) \ 225 - (__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))) 247 + _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val)) 226 248 227 249 #define this_cpu_add_1(pcp, val) _percpu_add(pcp, val) 228 250 #define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)

+5 -5

arch/arm64/kernel/vdso.c

··· 200 200 void update_vsyscall(struct timekeeper *tk) 201 201 { 202 202 struct timespec xtime_coarse; 203 - u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter"); 203 + u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter"); 204 204 205 205 ++vdso_data->tb_seq_count; 206 206 smp_wmb(); ··· 213 213 vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; 214 214 215 215 if (!use_syscall) { 216 - vdso_data->cs_cycle_last = tk->tkr.cycle_last; 216 + vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; 217 217 vdso_data->xtime_clock_sec = tk->xtime_sec; 218 - vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; 219 - vdso_data->cs_mult = tk->tkr.mult; 220 - vdso_data->cs_shift = tk->tkr.shift; 218 + vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; 219 + vdso_data->cs_mult = tk->tkr_mono.mult; 220 + vdso_data->cs_shift = tk->tkr_mono.shift; 221 221 } 222 222 223 223 smp_wmb();

+1

arch/metag/include/asm/io.h

··· 2 2 #define _ASM_METAG_IO_H 3 3 4 4 #include <linux/types.h> 5 + #include <asm/pgtable-bits.h> 5 6 6 7 #define IO_SPACE_LIMIT 0 7 8

+104

arch/metag/include/asm/pgtable-bits.h

··· 1 + /* 2 + * Meta page table definitions. 3 + */ 4 + 5 + #ifndef _METAG_PGTABLE_BITS_H 6 + #define _METAG_PGTABLE_BITS_H 7 + 8 + #include <asm/metag_mem.h> 9 + 10 + /* 11 + * Definitions for MMU descriptors 12 + * 13 + * These are the hardware bits in the MMCU pte entries. 14 + * Derived from the Meta toolkit headers. 15 + */ 16 + #define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT 17 + #define _PAGE_WRITE MMCU_ENTRY_WR_BIT 18 + #define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT 19 + /* Write combine bit - this can cause writes to occur out of order */ 20 + #define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT 21 + /* Sys coherent bit - this bit is never used by Linux */ 22 + #define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT 23 + #define _PAGE_ALWAYS_ZERO_1 0x020 24 + #define _PAGE_CACHE_CTRL0 0x040 25 + #define _PAGE_CACHE_CTRL1 0x080 26 + #define _PAGE_ALWAYS_ZERO_2 0x100 27 + #define _PAGE_ALWAYS_ZERO_3 0x200 28 + #define _PAGE_ALWAYS_ZERO_4 0x400 29 + #define _PAGE_ALWAYS_ZERO_5 0x800 30 + 31 + /* These are software bits that we stuff into the gaps in the hardware 32 + * pte entries that are not used. Note, these DO get stored in the actual 33 + * hardware, but the hardware just does not use them. 34 + */ 35 + #define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 36 + #define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 37 + 38 + /* Pages owned, and protected by, the kernel. */ 39 + #define _PAGE_KERNEL _PAGE_PRIV 40 + 41 + /* No cacheing of this page */ 42 + #define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S) 43 + /* burst cacheing - good for data streaming */ 44 + #define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S) 45 + /* One cache way per thread */ 46 + #define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S) 47 + /* Full on cacheing */ 48 + #define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S) 49 + 50 + #define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE) 51 + 52 + /* which bits are used for cache control ... */ 53 + #define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \ 54 + _PAGE_WR_COMBINE) 55 + 56 + /* This is a mask of the bits that pte_modify is allowed to change. */ 57 + #define _PAGE_CHG_MASK (PAGE_MASK) 58 + 59 + #define _PAGE_SZ_SHIFT 1 60 + #define _PAGE_SZ_4K (0x0) 61 + #define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT) 62 + #define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT) 63 + #define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT) 64 + #define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT) 65 + #define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT) 66 + #define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT) 67 + #define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT) 68 + #define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT) 69 + #define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT) 70 + #define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT) 71 + #define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT) 72 + 73 + #if defined(CONFIG_PAGE_SIZE_4K) 74 + #define _PAGE_SZ (_PAGE_SZ_4K) 75 + #elif defined(CONFIG_PAGE_SIZE_8K) 76 + #define _PAGE_SZ (_PAGE_SZ_8K) 77 + #elif defined(CONFIG_PAGE_SIZE_16K) 78 + #define _PAGE_SZ (_PAGE_SZ_16K) 79 + #endif 80 + #define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT) 81 + 82 + #if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) 83 + # define _PAGE_SZHUGE (_PAGE_SZ_8K) 84 + #elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) 85 + # define _PAGE_SZHUGE (_PAGE_SZ_16K) 86 + #elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) 87 + # define _PAGE_SZHUGE (_PAGE_SZ_32K) 88 + #elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) 89 + # define _PAGE_SZHUGE (_PAGE_SZ_64K) 90 + #elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) 91 + # define _PAGE_SZHUGE (_PAGE_SZ_128K) 92 + #elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) 93 + # define _PAGE_SZHUGE (_PAGE_SZ_256K) 94 + #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) 95 + # define _PAGE_SZHUGE (_PAGE_SZ_512K) 96 + #elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) 97 + # define _PAGE_SZHUGE (_PAGE_SZ_1M) 98 + #elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) 99 + # define _PAGE_SZHUGE (_PAGE_SZ_2M) 100 + #elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) 101 + # define _PAGE_SZHUGE (_PAGE_SZ_4M) 102 + #endif 103 + 104 + #endif /* _METAG_PGTABLE_BITS_H */

+1 -94

arch/metag/include/asm/pgtable.h

··· 5 5 #ifndef _METAG_PGTABLE_H 6 6 #define _METAG_PGTABLE_H 7 7 8 + #include <asm/pgtable-bits.h> 8 9 #include <asm-generic/pgtable-nopmd.h> 9 10 10 11 /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ ··· 19 18 #define CONSISTENT_END 0x773FFFFF 20 19 #define VMALLOC_START 0x78000000 21 20 #define VMALLOC_END 0x7FFFFFFF 22 - #endif 23 - 24 - /* 25 - * Definitions for MMU descriptors 26 - * 27 - * These are the hardware bits in the MMCU pte entries. 28 - * Derived from the Meta toolkit headers. 29 - */ 30 - #define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT 31 - #define _PAGE_WRITE MMCU_ENTRY_WR_BIT 32 - #define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT 33 - /* Write combine bit - this can cause writes to occur out of order */ 34 - #define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT 35 - /* Sys coherent bit - this bit is never used by Linux */ 36 - #define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT 37 - #define _PAGE_ALWAYS_ZERO_1 0x020 38 - #define _PAGE_CACHE_CTRL0 0x040 39 - #define _PAGE_CACHE_CTRL1 0x080 40 - #define _PAGE_ALWAYS_ZERO_2 0x100 41 - #define _PAGE_ALWAYS_ZERO_3 0x200 42 - #define _PAGE_ALWAYS_ZERO_4 0x400 43 - #define _PAGE_ALWAYS_ZERO_5 0x800 44 - 45 - /* These are software bits that we stuff into the gaps in the hardware 46 - * pte entries that are not used. Note, these DO get stored in the actual 47 - * hardware, but the hardware just does not use them. 48 - */ 49 - #define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 50 - #define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 51 - 52 - /* Pages owned, and protected by, the kernel. */ 53 - #define _PAGE_KERNEL _PAGE_PRIV 54 - 55 - /* No cacheing of this page */ 56 - #define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S) 57 - /* burst cacheing - good for data streaming */ 58 - #define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S) 59 - /* One cache way per thread */ 60 - #define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S) 61 - /* Full on cacheing */ 62 - #define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S) 63 - 64 - #define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE) 65 - 66 - /* which bits are used for cache control ... */ 67 - #define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \ 68 - _PAGE_WR_COMBINE) 69 - 70 - /* This is a mask of the bits that pte_modify is allowed to change. */ 71 - #define _PAGE_CHG_MASK (PAGE_MASK) 72 - 73 - #define _PAGE_SZ_SHIFT 1 74 - #define _PAGE_SZ_4K (0x0) 75 - #define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT) 76 - #define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT) 77 - #define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT) 78 - #define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT) 79 - #define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT) 80 - #define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT) 81 - #define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT) 82 - #define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT) 83 - #define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT) 84 - #define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT) 85 - #define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT) 86 - 87 - #if defined(CONFIG_PAGE_SIZE_4K) 88 - #define _PAGE_SZ (_PAGE_SZ_4K) 89 - #elif defined(CONFIG_PAGE_SIZE_8K) 90 - #define _PAGE_SZ (_PAGE_SZ_8K) 91 - #elif defined(CONFIG_PAGE_SIZE_16K) 92 - #define _PAGE_SZ (_PAGE_SZ_16K) 93 - #endif 94 - #define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT) 95 - 96 - #if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) 97 - # define _PAGE_SZHUGE (_PAGE_SZ_8K) 98 - #elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) 99 - # define _PAGE_SZHUGE (_PAGE_SZ_16K) 100 - #elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) 101 - # define _PAGE_SZHUGE (_PAGE_SZ_32K) 102 - #elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) 103 - # define _PAGE_SZHUGE (_PAGE_SZ_64K) 104 - #elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) 105 - # define _PAGE_SZHUGE (_PAGE_SZ_128K) 106 - #elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) 107 - # define _PAGE_SZHUGE (_PAGE_SZ_256K) 108 - #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) 109 - # define _PAGE_SZHUGE (_PAGE_SZ_512K) 110 - #elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) 111 - # define _PAGE_SZHUGE (_PAGE_SZ_1M) 112 - #elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) 113 - # define _PAGE_SZHUGE (_PAGE_SZ_2M) 114 - #elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) 115 - # define _PAGE_SZHUGE (_PAGE_SZ_4M) 116 21 #endif 117 22 118 23 /*

+3

arch/powerpc/include/asm/ppc-opcode.h

··· 153 153 #define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff 154 154 #define PPC_INST_MFTMR 0x7c0002dc 155 155 #define PPC_INST_MSGSND 0x7c00019c 156 + #define PPC_INST_MSGCLR 0x7c0001dc 156 157 #define PPC_INST_MSGSNDP 0x7c00011c 157 158 #define PPC_INST_MTTMR 0x7c0003dc 158 159 #define PPC_INST_NOP 0x60000000 ··· 309 308 ___PPC_RT(t) | ___PPC_RA(a) | \ 310 309 ___PPC_RB(b) | __PPC_EH(eh)) 311 310 #define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ 311 + ___PPC_RB(b)) 312 + #define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \ 312 313 ___PPC_RB(b)) 313 314 #define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \ 314 315 ___PPC_RB(b))

+3

arch/powerpc/include/asm/reg.h

··· 608 608 #define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ 609 609 #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ 610 610 #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ 611 + #define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 */ 611 612 #define SRR1_WAKESYSERR 0x00300000 /* System error */ 612 613 #define SRR1_WAKEEE 0x00200000 /* External interrupt */ 613 614 #define SRR1_WAKEMT 0x00280000 /* mtctrl */ 614 615 #define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */ 615 616 #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ 617 + #define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell on P8 */ 616 618 #define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ 617 619 #define SRR1_WAKERESET 0x00100000 /* System reset */ 620 + #define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */ 618 621 #define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ 619 622 #define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, 620 623 * may not be recoverable */

+20

arch/powerpc/kernel/cputable.c

··· 437 437 .machine_check_early = __machine_check_early_realmode_p8, 438 438 .platform = "power8", 439 439 }, 440 + { /* Power8NVL */ 441 + .pvr_mask = 0xffff0000, 442 + .pvr_value = 0x004c0000, 443 + .cpu_name = "POWER8NVL (raw)", 444 + .cpu_features = CPU_FTRS_POWER8, 445 + .cpu_user_features = COMMON_USER_POWER8, 446 + .cpu_user_features2 = COMMON_USER2_POWER8, 447 + .mmu_features = MMU_FTRS_POWER8, 448 + .icache_bsize = 128, 449 + .dcache_bsize = 128, 450 + .num_pmcs = 6, 451 + .pmc_type = PPC_PMC_IBM, 452 + .oprofile_cpu_type = "ppc64/power8", 453 + .oprofile_type = PPC_OPROFILE_INVALID, 454 + .cpu_setup = __setup_cpu_power8, 455 + .cpu_restore = __restore_cpu_power8, 456 + .flush_tlb = __flush_tlb_power8, 457 + .machine_check_early = __machine_check_early_realmode_p8, 458 + .platform = "power8", 459 + }, 440 460 { /* Power8 DD1: Does not support doorbell IPIs */ 441 461 .pvr_mask = 0xffffff00, 442 462 .pvr_value = 0x004d0100,

+2

arch/powerpc/kernel/dbell.c

··· 17 17 18 18 #include <asm/dbell.h> 19 19 #include <asm/irq_regs.h> 20 + #include <asm/kvm_ppc.h> 20 21 21 22 #ifdef CONFIG_SMP 22 23 void doorbell_setup_this_cpu(void) ··· 42 41 43 42 may_hard_irq_enable(); 44 43 44 + kvmppc_set_host_ipi(smp_processor_id(), 0); 45 45 __this_cpu_inc(irq_stat.doorbell_irqs); 46 46 47 47 smp_ipi_demux();

+1 -1

arch/powerpc/kernel/exceptions-64s.S

··· 1408 1408 bne 9f /* continue in V mode if we are. */ 1409 1409 1410 1410 5: 1411 - #ifdef CONFIG_KVM_BOOK3S_64_HV 1411 + #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 1412 1412 /* 1413 1413 * We are coming from kernel context. Check if we are coming from 1414 1414 * guest. if yes, then we can continue. We will fall through

+12 -2

arch/powerpc/platforms/powernv/smp.c

··· 33 33 #include <asm/runlatch.h> 34 34 #include <asm/code-patching.h> 35 35 #include <asm/dbell.h> 36 + #include <asm/kvm_ppc.h> 37 + #include <asm/ppc-opcode.h> 36 38 37 39 #include "powernv.h" 38 40 ··· 151 149 static void pnv_smp_cpu_kill_self(void) 152 150 { 153 151 unsigned int cpu; 154 - unsigned long srr1; 152 + unsigned long srr1, wmask; 155 153 u32 idle_states; 156 154 157 155 /* Standard hot unplug procedure */ ··· 162 160 DBG("CPU%d offline\n", cpu); 163 161 generic_set_cpu_dead(cpu); 164 162 smp_wmb(); 163 + 164 + wmask = SRR1_WAKEMASK; 165 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) 166 + wmask = SRR1_WAKEMASK_P8; 165 167 166 168 idle_states = pnv_get_supported_cpuidle_states(); 167 169 /* We don't want to take decrementer interrupts while we are offline, ··· 197 191 * having finished executing in a KVM guest, then srr1 198 192 * contains 0. 199 193 */ 200 - if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) { 194 + if ((srr1 & wmask) == SRR1_WAKEEE) { 201 195 icp_native_flush_interrupt(); 202 196 local_paca->irq_happened &= PACA_IRQ_HARD_DIS; 203 197 smp_mb(); 198 + } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { 199 + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); 200 + asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); 201 + kvmppc_set_host_ipi(cpu, 0); 204 202 } 205 203 206 204 if (cpu_core_split_required())

+23 -21

arch/powerpc/platforms/pseries/mobility.c

··· 25 25 static struct kobject *mobility_kobj; 26 26 27 27 struct update_props_workarea { 28 - u32 phandle; 29 - u32 state; 30 - u64 reserved; 31 - u32 nprops; 28 + __be32 phandle; 29 + __be32 state; 30 + __be64 reserved; 31 + __be32 nprops; 32 32 } __packed; 33 33 34 34 #define NODE_ACTION_MASK 0xff000000 ··· 54 54 return rc; 55 55 } 56 56 57 - static int delete_dt_node(u32 phandle) 57 + static int delete_dt_node(__be32 phandle) 58 58 { 59 59 struct device_node *dn; 60 60 61 - dn = of_find_node_by_phandle(phandle); 61 + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); 62 62 if (!dn) 63 63 return -ENOENT; 64 64 ··· 127 127 return 0; 128 128 } 129 129 130 - static int update_dt_node(u32 phandle, s32 scope) 130 + static int update_dt_node(__be32 phandle, s32 scope) 131 131 { 132 132 struct update_props_workarea *upwa; 133 133 struct device_node *dn; ··· 136 136 char *prop_data; 137 137 char *rtas_buf; 138 138 int update_properties_token; 139 + u32 nprops; 139 140 u32 vd; 140 141 141 142 update_properties_token = rtas_token("ibm,update-properties"); ··· 147 146 if (!rtas_buf) 148 147 return -ENOMEM; 149 148 150 - dn = of_find_node_by_phandle(phandle); 149 + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); 151 150 if (!dn) { 152 151 kfree(rtas_buf); 153 152 return -ENOENT; ··· 163 162 break; 164 163 165 164 prop_data = rtas_buf + sizeof(*upwa); 165 + nprops = be32_to_cpu(upwa->nprops); 166 166 167 167 /* On the first call to ibm,update-properties for a node the 168 168 * the first property value descriptor contains an empty ··· 172 170 */ 173 171 if (*prop_data == 0) { 174 172 prop_data++; 175 - vd = *(u32 *)prop_data; 173 + vd = be32_to_cpu(*(__be32 *)prop_data); 176 174 prop_data += vd + sizeof(vd); 177 - upwa->nprops--; 175 + nprops--; 178 176 } 179 177 180 - for (i = 0; i < upwa->nprops; i++) { 178 + for (i = 0; i < nprops; i++) { 181 179 char *prop_name; 182 180 183 181 prop_name = prop_data; 184 182 prop_data += strlen(prop_name) + 1; 185 - vd = *(u32 *)prop_data; 183 + vd = be32_to_cpu(*(__be32 *)prop_data); 186 184 prop_data += sizeof(vd); 187 185 188 186 switch (vd) { ··· 214 212 return 0; 215 213 } 216 214 217 - static int add_dt_node(u32 parent_phandle, u32 drc_index) 215 + static int add_dt_node(__be32 parent_phandle, __be32 drc_index) 218 216 { 219 217 struct device_node *dn; 220 218 struct device_node *parent_dn; 221 219 int rc; 222 220 223 - parent_dn = of_find_node_by_phandle(parent_phandle); 221 + parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle)); 224 222 if (!parent_dn) 225 223 return -ENOENT; 226 224 ··· 239 237 int pseries_devicetree_update(s32 scope) 240 238 { 241 239 char *rtas_buf; 242 - u32 *data; 240 + __be32 *data; 243 241 int update_nodes_token; 244 242 int rc; 245 243 ··· 256 254 if (rc && rc != 1) 257 255 break; 258 256 259 - data = (u32 *)rtas_buf + 4; 260 - while (*data & NODE_ACTION_MASK) { 257 + data = (__be32 *)rtas_buf + 4; 258 + while (be32_to_cpu(*data) & NODE_ACTION_MASK) { 261 259 int i; 262 - u32 action = *data & NODE_ACTION_MASK; 263 - int node_count = *data & NODE_COUNT_MASK; 260 + u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK; 261 + u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; 264 262 265 263 data++; 266 264 267 265 for (i = 0; i < node_count; i++) { 268 - u32 phandle = *data++; 269 - u32 drc_index; 266 + __be32 phandle = *data++; 267 + __be32 drc_index; 270 268 271 269 switch (action) { 272 270 case DELETE_DT_NODE:

+1 -1

arch/s390/include/asm/elf.h

··· 211 211 212 212 extern unsigned long mmap_rnd_mask; 213 213 214 - #define STACK_RND_MASK (mmap_rnd_mask) 214 + #define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask) 215 215 216 216 #define ARCH_DLINFO \ 217 217 do { \

+45 -16

arch/s390/kernel/ftrace.c

··· 57 57 58 58 unsigned long ftrace_plt; 59 59 60 + static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) 61 + { 62 + #ifdef CC_USING_HOTPATCH 63 + /* brcl 0,0 */ 64 + insn->opc = 0xc004; 65 + insn->disp = 0; 66 + #else 67 + /* stg r14,8(r15) */ 68 + insn->opc = 0xe3e0; 69 + insn->disp = 0xf0080024; 70 + #endif 71 + } 72 + 73 + static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn) 74 + { 75 + #ifdef CONFIG_KPROBES 76 + if (insn->opc == BREAKPOINT_INSTRUCTION) 77 + return 1; 78 + #endif 79 + return 0; 80 + } 81 + 82 + static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn) 83 + { 84 + #ifdef CONFIG_KPROBES 85 + insn->opc = BREAKPOINT_INSTRUCTION; 86 + insn->disp = KPROBE_ON_FTRACE_NOP; 87 + #endif 88 + } 89 + 90 + static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn) 91 + { 92 + #ifdef CONFIG_KPROBES 93 + insn->opc = BREAKPOINT_INSTRUCTION; 94 + insn->disp = KPROBE_ON_FTRACE_CALL; 95 + #endif 96 + } 97 + 60 98 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, 61 99 unsigned long addr) 62 100 { ··· 110 72 return -EFAULT; 111 73 if (addr == MCOUNT_ADDR) { 112 74 /* Initial code replacement */ 113 - #ifdef CC_USING_HOTPATCH 114 - /* We expect to see brcl 0,0 */ 115 - ftrace_generate_nop_insn(&orig); 116 - #else 117 - /* We expect to see stg r14,8(r15) */ 118 - orig.opc = 0xe3e0; 119 - orig.disp = 0xf0080024; 120 - #endif 75 + ftrace_generate_orig_insn(&orig); 121 76 ftrace_generate_nop_insn(&new); 122 - } else if (old.opc == BREAKPOINT_INSTRUCTION) { 77 + } else if (is_kprobe_on_ftrace(&old)) { 123 78 /* 124 79 * If we find a breakpoint instruction, a kprobe has been 125 80 * placed at the beginning of the function. We write the ··· 120 89 * bytes of the original instruction so that the kprobes 121 90 * handler can execute a nop, if it reaches this breakpoint. 122 91 */ 123 - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; 124 - orig.disp = KPROBE_ON_FTRACE_CALL; 125 - new.disp = KPROBE_ON_FTRACE_NOP; 92 + ftrace_generate_kprobe_call_insn(&orig); 93 + ftrace_generate_kprobe_nop_insn(&new); 126 94 } else { 127 95 /* Replace ftrace call with a nop. */ 128 96 ftrace_generate_call_insn(&orig, rec->ip); ··· 141 111 142 112 if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) 143 113 return -EFAULT; 144 - if (old.opc == BREAKPOINT_INSTRUCTION) { 114 + if (is_kprobe_on_ftrace(&old)) { 145 115 /* 146 116 * If we find a breakpoint instruction, a kprobe has been 147 117 * placed at the beginning of the function. We write the ··· 149 119 * bytes of the original instruction so that the kprobes 150 120 * handler can execute a brasl if it reaches this breakpoint. 151 121 */ 152 - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; 153 - orig.disp = KPROBE_ON_FTRACE_NOP; 154 - new.disp = KPROBE_ON_FTRACE_CALL; 122 + ftrace_generate_kprobe_nop_insn(&orig); 123 + ftrace_generate_kprobe_call_insn(&new); 155 124 } else { 156 125 /* Replace nop with an ftrace call. */ 157 126 ftrace_generate_nop_insn(&orig);

+5 -2

arch/s390/kernel/perf_cpum_sf.c

··· 1415 1415 1416 1416 static struct attribute *cpumsf_pmu_events_attr[] = { 1417 1417 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), 1418 - CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), 1418 + NULL, 1419 1419 NULL, 1420 1420 }; 1421 1421 ··· 1606 1606 return -EINVAL; 1607 1607 } 1608 1608 1609 - if (si.ad) 1609 + if (si.ad) { 1610 1610 sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); 1611 + cpumsf_pmu_events_attr[1] = 1612 + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); 1613 + } 1611 1614 1612 1615 sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); 1613 1616 if (!sfdbg)

+11

arch/s390/kernel/swsusp_asm64.S

··· 177 177 lhi %r1,1 178 178 sigp %r1,%r0,SIGP_SET_ARCHITECTURE 179 179 sam64 180 + #ifdef CONFIG_SMP 181 + larl %r1,smp_cpu_mt_shift 182 + icm %r1,15,0(%r1) 183 + jz smt_done 184 + llgfr %r1,%r1 185 + smt_loop: 186 + sigp %r1,%r0,SIGP_SET_MULTI_THREADING 187 + brc 8,smt_done /* accepted */ 188 + brc 2,smt_loop /* busy, try again */ 189 + smt_done: 190 + #endif 180 191 larl %r1,.Lnew_pgm_check_psw 181 192 lpswe 0(%r1) 182 193 pgm_check_entry:

+10 -10

arch/s390/kernel/time.c

··· 215 215 { 216 216 u64 nsecps; 217 217 218 - if (tk->tkr.clock != &clocksource_tod) 218 + if (tk->tkr_mono.clock != &clocksource_tod) 219 219 return; 220 220 221 221 /* Make userspace gettimeofday spin until we're done. */ 222 222 ++vdso_data->tb_update_count; 223 223 smp_wmb(); 224 - vdso_data->xtime_tod_stamp = tk->tkr.cycle_last; 224 + vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last; 225 225 vdso_data->xtime_clock_sec = tk->xtime_sec; 226 - vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; 226 + vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; 227 227 vdso_data->wtom_clock_sec = 228 228 tk->xtime_sec + tk->wall_to_monotonic.tv_sec; 229 - vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec + 230 - + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift); 231 - nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift; 229 + vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec + 230 + + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); 231 + nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift; 232 232 while (vdso_data->wtom_clock_nsec >= nsecps) { 233 233 vdso_data->wtom_clock_nsec -= nsecps; 234 234 vdso_data->wtom_clock_sec++; ··· 236 236 237 237 vdso_data->xtime_coarse_sec = tk->xtime_sec; 238 238 vdso_data->xtime_coarse_nsec = 239 - (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); 239 + (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 240 240 vdso_data->wtom_coarse_sec = 241 241 vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; 242 242 vdso_data->wtom_coarse_nsec = ··· 246 246 vdso_data->wtom_coarse_sec++; 247 247 } 248 248 249 - vdso_data->tk_mult = tk->tkr.mult; 250 - vdso_data->tk_shift = tk->tkr.shift; 249 + vdso_data->tk_mult = tk->tkr_mono.mult; 250 + vdso_data->tk_shift = tk->tkr_mono.shift; 251 251 smp_wmb(); 252 252 ++vdso_data->tb_update_count; 253 253 } ··· 283 283 if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt)) 284 284 panic("Couldn't request external interrupt 0x1406"); 285 285 286 - if (clocksource_register(&clocksource_tod) != 0) 286 + if (__clocksource_register(&clocksource_tod) != 0) 287 287 panic("Could not register TOD clock source"); 288 288 289 289 /* Enable TOD clock interrupts on the boot cpu. */

+12

arch/sparc/include/asm/hypervisor.h

··· 2957 2957 unsigned long reg_val); 2958 2958 #endif 2959 2959 2960 + 2961 + #define HV_FAST_M7_GET_PERFREG 0x43 2962 + #define HV_FAST_M7_SET_PERFREG 0x44 2963 + 2964 + #ifndef __ASSEMBLY__ 2965 + unsigned long sun4v_m7_get_perfreg(unsigned long reg_num, 2966 + unsigned long *reg_val); 2967 + unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, 2968 + unsigned long reg_val); 2969 + #endif 2970 + 2960 2971 /* Function numbers for HV_CORE_TRAP. */ 2961 2972 #define HV_CORE_SET_VER 0x00 2962 2973 #define HV_CORE_PUTCHAR 0x01 ··· 2992 2981 #define HV_GRP_SDIO 0x0108 2993 2982 #define HV_GRP_SDIO_ERR 0x0109 2994 2983 #define HV_GRP_REBOOT_DATA 0x0110 2984 + #define HV_GRP_M7_PERF 0x0114 2995 2985 #define HV_GRP_NIAG_PERF 0x0200 2996 2986 #define HV_GRP_FIRE_PERF 0x0201 2997 2987 #define HV_GRP_N2_CPU 0x0202

+1

arch/sparc/kernel/hvapi.c

··· 48 48 { .group = HV_GRP_VT_CPU, }, 49 49 { .group = HV_GRP_T5_CPU, }, 50 50 { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, 51 + { .group = HV_GRP_M7_PERF, }, 51 52 }; 52 53 53 54 static DEFINE_SPINLOCK(hvapi_lock);

+16

arch/sparc/kernel/hvcalls.S

··· 837 837 retl 838 838 nop 839 839 ENDPROC(sun4v_t5_set_perfreg) 840 + 841 + ENTRY(sun4v_m7_get_perfreg) 842 + mov %o1, %o4 843 + mov HV_FAST_M7_GET_PERFREG, %o5 844 + ta HV_FAST_TRAP 845 + stx %o1, [%o4] 846 + retl 847 + nop 848 + ENDPROC(sun4v_m7_get_perfreg) 849 + 850 + ENTRY(sun4v_m7_set_perfreg) 851 + mov HV_FAST_M7_SET_PERFREG, %o5 852 + ta HV_FAST_TRAP 853 + retl 854 + nop 855 + ENDPROC(sun4v_m7_set_perfreg)

+33

arch/sparc/kernel/pcr.c

··· 217 217 .pcr_nmi_disable = PCR_N4_PICNPT, 218 218 }; 219 219 220 + static u64 m7_pcr_read(unsigned long reg_num) 221 + { 222 + unsigned long val; 223 + 224 + (void) sun4v_m7_get_perfreg(reg_num, &val); 225 + 226 + return val; 227 + } 228 + 229 + static void m7_pcr_write(unsigned long reg_num, u64 val) 230 + { 231 + (void) sun4v_m7_set_perfreg(reg_num, val); 232 + } 233 + 234 + static const struct pcr_ops m7_pcr_ops = { 235 + .read_pcr = m7_pcr_read, 236 + .write_pcr = m7_pcr_write, 237 + .read_pic = n4_pic_read, 238 + .write_pic = n4_pic_write, 239 + .nmi_picl_value = n4_picl_value, 240 + .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | 241 + PCR_N4_UTRACE | PCR_N4_TOE | 242 + (26 << PCR_N4_SL_SHIFT)), 243 + .pcr_nmi_disable = PCR_N4_PICNPT, 244 + }; 220 245 221 246 static unsigned long perf_hsvc_group; 222 247 static unsigned long perf_hsvc_major; ··· 271 246 272 247 case SUN4V_CHIP_NIAGARA5: 273 248 perf_hsvc_group = HV_GRP_T5_CPU; 249 + break; 250 + 251 + case SUN4V_CHIP_SPARC_M7: 252 + perf_hsvc_group = HV_GRP_M7_PERF; 274 253 break; 275 254 276 255 default: ··· 320 291 321 292 case SUN4V_CHIP_NIAGARA5: 322 293 pcr_ops = &n5_pcr_ops; 294 + break; 295 + 296 + case SUN4V_CHIP_SPARC_M7: 297 + pcr_ops = &m7_pcr_ops; 323 298 break; 324 299 325 300 default:

+43 -12

arch/sparc/kernel/perf_event.c

··· 792 792 .num_pic_regs = 4, 793 793 }; 794 794 795 + static void sparc_m7_write_pmc(int idx, u64 val) 796 + { 797 + u64 pcr; 798 + 799 + pcr = pcr_ops->read_pcr(idx); 800 + /* ensure ov and ntc are reset */ 801 + pcr &= ~(PCR_N4_OV | PCR_N4_NTC); 802 + 803 + pcr_ops->write_pic(idx, val & 0xffffffff); 804 + 805 + pcr_ops->write_pcr(idx, pcr); 806 + } 807 + 808 + static const struct sparc_pmu sparc_m7_pmu = { 809 + .event_map = niagara4_event_map, 810 + .cache_map = &niagara4_cache_map, 811 + .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), 812 + .read_pmc = sparc_vt_read_pmc, 813 + .write_pmc = sparc_m7_write_pmc, 814 + .upper_shift = 5, 815 + .lower_shift = 5, 816 + .event_mask = 0x7ff, 817 + .user_bit = PCR_N4_UTRACE, 818 + .priv_bit = PCR_N4_STRACE, 819 + 820 + /* We explicitly don't support hypervisor tracing. */ 821 + .hv_bit = 0, 822 + 823 + .irq_bit = PCR_N4_TOE, 824 + .upper_nop = 0, 825 + .lower_nop = 0, 826 + .flags = 0, 827 + .max_hw_events = 4, 828 + .num_pcrs = 4, 829 + .num_pic_regs = 4, 830 + }; 795 831 static const struct sparc_pmu *sparc_pmu __read_mostly; 796 832 797 833 static u64 event_encoding(u64 event_id, int idx) ··· 996 960 cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; 997 961 } 998 962 963 + static void sparc_pmu_start(struct perf_event *event, int flags); 964 + 999 965 /* On this PMU each PIC has it's own PCR control register. */ 1000 966 static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) 1001 967 { ··· 1010 972 struct perf_event *cp = cpuc->event[i]; 1011 973 struct hw_perf_event *hwc = &cp->hw; 1012 974 int idx = hwc->idx; 1013 - u64 enc; 1014 975 1015 976 if (cpuc->current_idx[i] != PIC_NO_INDEX) 1016 977 continue; 1017 978 1018 - sparc_perf_event_set_period(cp, hwc, idx); 1019 979 cpuc->current_idx[i] = idx; 1020 980 1021 - enc = perf_event_get_enc(cpuc->events[i]); 1022 - cpuc->pcr[idx] &= ~mask_for_index(idx); 1023 - if (hwc->state & PERF_HES_STOPPED) 1024 - cpuc->pcr[idx] |= nop_for_index(idx); 1025 - else 1026 - cpuc->pcr[idx] |= event_encoding(enc, idx); 981 + sparc_pmu_start(cp, PERF_EF_RELOAD); 1027 982 } 1028 983 out: 1029 984 for (i = 0; i < cpuc->n_events; i++) { ··· 1132 1101 int i; 1133 1102 1134 1103 local_irq_save(flags); 1135 - perf_pmu_disable(event->pmu); 1136 1104 1137 1105 for (i = 0; i < cpuc->n_events; i++) { 1138 1106 if (event == cpuc->event[i]) { ··· 1157 1127 } 1158 1128 } 1159 1129 1160 - perf_pmu_enable(event->pmu); 1161 1130 local_irq_restore(flags); 1162 1131 } 1163 1132 ··· 1390 1361 unsigned long flags; 1391 1362 1392 1363 local_irq_save(flags); 1393 - perf_pmu_disable(event->pmu); 1394 1364 1395 1365 n0 = cpuc->n_events; 1396 1366 if (n0 >= sparc_pmu->max_hw_events) ··· 1422 1394 1423 1395 ret = 0; 1424 1396 out: 1425 - perf_pmu_enable(event->pmu); 1426 1397 local_irq_restore(flags); 1427 1398 return ret; 1428 1399 } ··· 1692 1665 if (!strcmp(sparc_pmu_type, "niagara4") || 1693 1666 !strcmp(sparc_pmu_type, "niagara5")) { 1694 1667 sparc_pmu = &niagara4_pmu; 1668 + return true; 1669 + } 1670 + if (!strcmp(sparc_pmu_type, "sparc-m7")) { 1671 + sparc_pmu = &sparc_m7_pmu; 1695 1672 return true; 1696 1673 } 1697 1674 return false;

+4

arch/sparc/kernel/process_64.c

··· 287 287 printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n", 288 288 gp->tpc, gp->o7, gp->i7, gp->rpc); 289 289 } 290 + 291 + touch_nmi_watchdog(); 290 292 } 291 293 292 294 memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); ··· 364 362 (cpu == this_cpu ? '*' : ' '), cpu, 365 363 pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3], 366 364 pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]); 365 + 366 + touch_nmi_watchdog(); 367 367 } 368 368 369 369 memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));

+1 -5

arch/sparc/kernel/time_32.c

··· 181 181 .rating = 100, 182 182 .read = timer_cs_read, 183 183 .mask = CLOCKSOURCE_MASK(64), 184 - .shift = 2, 185 184 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 186 185 }; 187 186 188 187 static __init int setup_timer_cs(void) 189 188 { 190 189 timer_cs_enabled = 1; 191 - timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate, 192 - timer_cs.shift); 193 - 194 - return clocksource_register(&timer_cs); 190 + return clocksource_register_hz(&timer_cs, sparc_config.clock_rate); 195 191 } 196 192 197 193 #ifdef CONFIG_SMP

+32 -3

arch/sparc/lib/memmove.S

··· 8 8 9 9 .text 10 10 ENTRY(memmove) /* o0=dst o1=src o2=len */ 11 - mov %o0, %g1 11 + brz,pn %o2, 99f 12 + mov %o0, %g1 13 + 12 14 cmp %o0, %o1 13 - bleu,pt %xcc, memcpy 15 + bleu,pt %xcc, 2f 14 16 add %o1, %o2, %g7 15 17 cmp %g7, %o0 16 18 bleu,pt %xcc, memcpy ··· 26 24 stb %g7, [%o0] 27 25 bne,pt %icc, 1b 28 26 sub %o0, 1, %o0 29 - 27 + 99: 30 28 retl 31 29 mov %g1, %o0 30 + 31 + /* We can't just call memcpy for these memmove cases. On some 32 + * chips the memcpy uses cache initializing stores and when dst 33 + * and src are close enough, those can clobber the source data 34 + * before we've loaded it in. 35 + */ 36 + 2: or %o0, %o1, %g7 37 + or %o2, %g7, %g7 38 + andcc %g7, 0x7, %g0 39 + bne,pn %xcc, 4f 40 + nop 41 + 42 + 3: ldx [%o1], %g7 43 + add %o1, 8, %o1 44 + subcc %o2, 8, %o2 45 + add %o0, 8, %o0 46 + bne,pt %icc, 3b 47 + stx %g7, [%o0 - 0x8] 48 + ba,a,pt %xcc, 99b 49 + 50 + 4: ldub [%o1], %g7 51 + add %o1, 1, %o1 52 + subcc %o2, 1, %o2 53 + add %o0, 1, %o0 54 + bne,pt %icc, 4b 55 + stb %g7, [%o0 - 0x1] 56 + ba,a,pt %xcc, 99b 32 57 ENDPROC(memmove)

+12 -12

arch/tile/kernel/time.c

··· 257 257 258 258 void update_vsyscall(struct timekeeper *tk) 259 259 { 260 - if (tk->tkr.clock != &cycle_counter_cs) 260 + if (tk->tkr_mono.clock != &cycle_counter_cs) 261 261 return; 262 262 263 263 write_seqcount_begin(&vdso_data->tb_seq); 264 264 265 - vdso_data->cycle_last = tk->tkr.cycle_last; 266 - vdso_data->mask = tk->tkr.mask; 267 - vdso_data->mult = tk->tkr.mult; 268 - vdso_data->shift = tk->tkr.shift; 265 + vdso_data->cycle_last = tk->tkr_mono.cycle_last; 266 + vdso_data->mask = tk->tkr_mono.mask; 267 + vdso_data->mult = tk->tkr_mono.mult; 268 + vdso_data->shift = tk->tkr_mono.shift; 269 269 270 270 vdso_data->wall_time_sec = tk->xtime_sec; 271 - vdso_data->wall_time_snsec = tk->tkr.xtime_nsec; 271 + vdso_data->wall_time_snsec = tk->tkr_mono.xtime_nsec; 272 272 273 273 vdso_data->monotonic_time_sec = tk->xtime_sec 274 274 + tk->wall_to_monotonic.tv_sec; 275 - vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec 275 + vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec 276 276 + ((u64)tk->wall_to_monotonic.tv_nsec 277 - << tk->tkr.shift); 277 + << tk->tkr_mono.shift); 278 278 while (vdso_data->monotonic_time_snsec >= 279 - (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { 279 + (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { 280 280 vdso_data->monotonic_time_snsec -= 281 - ((u64)NSEC_PER_SEC) << tk->tkr.shift; 281 + ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; 282 282 vdso_data->monotonic_time_sec++; 283 283 } 284 284 285 285 vdso_data->wall_time_coarse_sec = tk->xtime_sec; 286 - vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> 287 - tk->tkr.shift); 286 + vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >> 287 + tk->tkr_mono.shift); 288 288 289 289 vdso_data->monotonic_time_coarse_sec = 290 290 vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;

+12 -2

arch/x86/kernel/cpu/perf_event.c

··· 1978 1978 1979 1979 data = cyc2ns_read_begin(); 1980 1980 1981 + /* 1982 + * Internal timekeeping for enabled/running/stopped times 1983 + * is always in the local_clock domain. 1984 + */ 1981 1985 userpg->cap_user_time = 1; 1982 1986 userpg->time_mult = data->cyc2ns_mul; 1983 1987 userpg->time_shift = data->cyc2ns_shift; 1984 1988 userpg->time_offset = data->cyc2ns_offset - now; 1985 1989 1986 - userpg->cap_user_time_zero = 1; 1987 - userpg->time_zero = data->cyc2ns_offset; 1990 + /* 1991 + * cap_user_time_zero doesn't make sense when we're using a different 1992 + * time base for the records. 1993 + */ 1994 + if (event->clock == &local_clock) { 1995 + userpg->cap_user_time_zero = 1; 1996 + userpg->time_zero = data->cyc2ns_offset; 1997 + } 1988 1998 1989 1999 cyc2ns_read_end(data); 1990 2000 }

+12 -12

arch/x86/kernel/vsyscall_gtod.c

··· 31 31 gtod_write_begin(vdata); 32 32 33 33 /* copy vsyscall data */ 34 - vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode; 35 - vdata->cycle_last = tk->tkr.cycle_last; 36 - vdata->mask = tk->tkr.mask; 37 - vdata->mult = tk->tkr.mult; 38 - vdata->shift = tk->tkr.shift; 34 + vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; 35 + vdata->cycle_last = tk->tkr_mono.cycle_last; 36 + vdata->mask = tk->tkr_mono.mask; 37 + vdata->mult = tk->tkr_mono.mult; 38 + vdata->shift = tk->tkr_mono.shift; 39 39 40 40 vdata->wall_time_sec = tk->xtime_sec; 41 - vdata->wall_time_snsec = tk->tkr.xtime_nsec; 41 + vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec; 42 42 43 43 vdata->monotonic_time_sec = tk->xtime_sec 44 44 + tk->wall_to_monotonic.tv_sec; 45 - vdata->monotonic_time_snsec = tk->tkr.xtime_nsec 45 + vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec 46 46 + ((u64)tk->wall_to_monotonic.tv_nsec 47 - << tk->tkr.shift); 47 + << tk->tkr_mono.shift); 48 48 while (vdata->monotonic_time_snsec >= 49 - (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { 49 + (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { 50 50 vdata->monotonic_time_snsec -= 51 - ((u64)NSEC_PER_SEC) << tk->tkr.shift; 51 + ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; 52 52 vdata->monotonic_time_sec++; 53 53 } 54 54 55 55 vdata->wall_time_coarse_sec = tk->xtime_sec; 56 - vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> 57 - tk->tkr.shift); 56 + vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >> 57 + tk->tkr_mono.shift); 58 58 59 59 vdata->monotonic_time_coarse_sec = 60 60 vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;

+3 -1

arch/x86/kvm/ioapic.c

··· 422 422 struct kvm_ioapic *ioapic, int vector, int trigger_mode) 423 423 { 424 424 int i; 425 + struct kvm_lapic *apic = vcpu->arch.apic; 425 426 426 427 for (i = 0; i < IOAPIC_NUM_PINS; i++) { 427 428 union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; ··· 444 443 kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); 445 444 spin_lock(&ioapic->lock); 446 445 447 - if (trigger_mode != IOAPIC_LEVEL_TRIG) 446 + if (trigger_mode != IOAPIC_LEVEL_TRIG || 447 + kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) 448 448 continue; 449 449 450 450 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);

+1 -2

arch/x86/kvm/lapic.c

··· 833 833 834 834 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 835 835 { 836 - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 837 - kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { 836 + if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { 838 837 int trigger_mode; 839 838 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 840 839 trigger_mode = IOAPIC_LEVEL_TRIG;

+5 -2

arch/x86/kvm/vmx.c

··· 2479 2479 if (enable_ept) { 2480 2480 /* nested EPT: emulate EPT also to L1 */ 2481 2481 vmx->nested.nested_vmx_secondary_ctls_high |= 2482 - SECONDARY_EXEC_ENABLE_EPT | 2483 - SECONDARY_EXEC_UNRESTRICTED_GUEST; 2482 + SECONDARY_EXEC_ENABLE_EPT; 2484 2483 vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2485 2484 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | 2486 2485 VMX_EPT_INVEPT_BIT; ··· 2492 2493 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT; 2493 2494 } else 2494 2495 vmx->nested.nested_vmx_ept_caps = 0; 2496 + 2497 + if (enable_unrestricted_guest) 2498 + vmx->nested.nested_vmx_secondary_ctls_high |= 2499 + SECONDARY_EXEC_UNRESTRICTED_GUEST; 2495 2500 2496 2501 /* miscellaneous data */ 2497 2502 rdmsr(MSR_IA32_VMX_MISC,

+7 -7

arch/x86/kvm/x86.c

··· 1070 1070 struct pvclock_gtod_data *vdata = &pvclock_gtod_data; 1071 1071 u64 boot_ns; 1072 1072 1073 - boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot)); 1073 + boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); 1074 1074 1075 1075 write_seqcount_begin(&vdata->seq); 1076 1076 1077 1077 /* copy pvclock gtod data */ 1078 - vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode; 1079 - vdata->clock.cycle_last = tk->tkr.cycle_last; 1080 - vdata->clock.mask = tk->tkr.mask; 1081 - vdata->clock.mult = tk->tkr.mult; 1082 - vdata->clock.shift = tk->tkr.shift; 1078 + vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; 1079 + vdata->clock.cycle_last = tk->tkr_mono.cycle_last; 1080 + vdata->clock.mask = tk->tkr_mono.mask; 1081 + vdata->clock.mult = tk->tkr_mono.mult; 1082 + vdata->clock.shift = tk->tkr_mono.shift; 1083 1083 1084 1084 vdata->boot_ns = boot_ns; 1085 - vdata->nsec_base = tk->tkr.xtime_nsec; 1085 + vdata->nsec_base = tk->tkr_mono.xtime_nsec; 1086 1086 1087 1087 write_seqcount_end(&vdata->seq); 1088 1088 }

+1 -1

block/blk-merge.c

··· 592 592 if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { 593 593 struct bio_vec *bprev; 594 594 595 - bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1]; 595 + bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1]; 596 596 if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset)) 597 597 return false; 598 598 }

+4 -2

block/blk-mq-tag.c

··· 278 278 /* 279 279 * We're out of tags on this hardware queue, kick any 280 280 * pending IO submits before going to sleep waiting for 281 - * some to complete. 281 + * some to complete. Note that hctx can be NULL here for 282 + * reserved tag allocation. 282 283 */ 283 - blk_mq_run_hw_queue(hctx, false); 284 + if (hctx) 285 + blk_mq_run_hw_queue(hctx, false); 284 286 285 287 /* 286 288 * Retry tag allocation after running the hardware queue,

+3 -3

block/blk-mq.c

··· 1938 1938 */ 1939 1939 if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, 1940 1940 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) 1941 - goto err_map; 1941 + goto err_mq_usage; 1942 1942 1943 1943 setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); 1944 1944 blk_queue_rq_timeout(q, 30000); ··· 1981 1981 blk_mq_init_cpu_queues(q, set->nr_hw_queues); 1982 1982 1983 1983 if (blk_mq_init_hw_queues(q, set)) 1984 - goto err_hw; 1984 + goto err_mq_usage; 1985 1985 1986 1986 mutex_lock(&all_q_mutex); 1987 1987 list_add_tail(&q->all_q_node, &all_q_list); ··· 1993 1993 1994 1994 return q; 1995 1995 1996 - err_hw: 1996 + err_mq_usage: 1997 1997 blk_cleanup_queue(q); 1998 1998 err_hctxs: 1999 1999 kfree(map);

+2 -2

drivers/ata/libata-core.c

··· 4737 4737 return NULL; 4738 4738 4739 4739 /* libsas case */ 4740 - if (!ap->scsi_host) { 4740 + if (ap->flags & ATA_FLAG_SAS_HOST) { 4741 4741 tag = ata_sas_allocate_tag(ap); 4742 4742 if (tag < 0) 4743 4743 return NULL; ··· 4776 4776 tag = qc->tag; 4777 4777 if (likely(ata_tag_valid(tag))) { 4778 4778 qc->tag = ATA_TAG_POISON; 4779 - if (!ap->scsi_host) 4779 + if (ap->flags & ATA_FLAG_SAS_HOST) 4780 4780 ata_sas_free_tag(tag, ap); 4781 4781 } 4782 4782 }

+8

drivers/base/regmap/internal.h

··· 243 243 extern struct regcache_ops regcache_lzo_ops; 244 244 extern struct regcache_ops regcache_flat_ops; 245 245 246 + static inline const char *regmap_name(const struct regmap *map) 247 + { 248 + if (map->dev) 249 + return dev_name(map->dev); 250 + 251 + return map->name; 252 + } 253 + 246 254 #endif

+8 -8

drivers/base/regmap/regcache.c

··· 218 218 ret = map->cache_ops->read(map, reg, value); 219 219 220 220 if (ret == 0) 221 - trace_regmap_reg_read_cache(map->dev, reg, *value); 221 + trace_regmap_reg_read_cache(map, reg, *value); 222 222 223 223 return ret; 224 224 } ··· 311 311 dev_dbg(map->dev, "Syncing %s cache\n", 312 312 map->cache_ops->name); 313 313 name = map->cache_ops->name; 314 - trace_regcache_sync(map->dev, name, "start"); 314 + trace_regcache_sync(map, name, "start"); 315 315 316 316 if (!map->cache_dirty) 317 317 goto out; ··· 346 346 347 347 regmap_async_complete(map); 348 348 349 - trace_regcache_sync(map->dev, name, "stop"); 349 + trace_regcache_sync(map, name, "stop"); 350 350 351 351 return ret; 352 352 } ··· 381 381 name = map->cache_ops->name; 382 382 dev_dbg(map->dev, "Syncing %s cache from %d-%d\n", name, min, max); 383 383 384 - trace_regcache_sync(map->dev, name, "start region"); 384 + trace_regcache_sync(map, name, "start region"); 385 385 386 386 if (!map->cache_dirty) 387 387 goto out; ··· 401 401 402 402 regmap_async_complete(map); 403 403 404 - trace_regcache_sync(map->dev, name, "stop region"); 404 + trace_regcache_sync(map, name, "stop region"); 405 405 406 406 return ret; 407 407 } ··· 428 428 429 429 map->lock(map->lock_arg); 430 430 431 - trace_regcache_drop_region(map->dev, min, max); 431 + trace_regcache_drop_region(map, min, max); 432 432 433 433 ret = map->cache_ops->drop(map, min, max); 434 434 ··· 455 455 map->lock(map->lock_arg); 456 456 WARN_ON(map->cache_bypass && enable); 457 457 map->cache_only = enable; 458 - trace_regmap_cache_only(map->dev, enable); 458 + trace_regmap_cache_only(map, enable); 459 459 map->unlock(map->lock_arg); 460 460 } 461 461 EXPORT_SYMBOL_GPL(regcache_cache_only); ··· 493 493 map->lock(map->lock_arg); 494 494 WARN_ON(map->cache_only && enable); 495 495 map->cache_bypass = enable; 496 - trace_regmap_cache_bypass(map->dev, enable); 496 + trace_regmap_cache_bypass(map, enable); 497 497 map->unlock(map->lock_arg); 498 498 } 499 499 EXPORT_SYMBOL_GPL(regcache_cache_bypass);

+14 -18

drivers/base/regmap/regmap.c

··· 1281 1281 if (map->async && map->bus->async_write) { 1282 1282 struct regmap_async *async; 1283 1283 1284 - trace_regmap_async_write_start(map->dev, reg, val_len); 1284 + trace_regmap_async_write_start(map, reg, val_len); 1285 1285 1286 1286 spin_lock_irqsave(&map->async_lock, flags); 1287 1287 async = list_first_entry_or_null(&map->async_free, ··· 1339 1339 return ret; 1340 1340 } 1341 1341 1342 - trace_regmap_hw_write_start(map->dev, reg, 1343 - val_len / map->format.val_bytes); 1342 + trace_regmap_hw_write_start(map, reg, val_len / map->format.val_bytes); 1344 1343 1345 1344 /* If we're doing a single register write we can probably just 1346 1345 * send the work_buf directly, otherwise try to do a gather ··· 1371 1372 kfree(buf); 1372 1373 } 1373 1374 1374 - trace_regmap_hw_write_done(map->dev, reg, 1375 - val_len / map->format.val_bytes); 1375 + trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes); 1376 1376 1377 1377 return ret; 1378 1378 } ··· 1405 1407 1406 1408 map->format.format_write(map, reg, val); 1407 1409 1408 - trace_regmap_hw_write_start(map->dev, reg, 1); 1410 + trace_regmap_hw_write_start(map, reg, 1); 1409 1411 1410 1412 ret = map->bus->write(map->bus_context, map->work_buf, 1411 1413 map->format.buf_size); 1412 1414 1413 - trace_regmap_hw_write_done(map->dev, reg, 1); 1415 + trace_regmap_hw_write_done(map, reg, 1); 1414 1416 1415 1417 return ret; 1416 1418 } ··· 1468 1470 dev_info(map->dev, "%x <= %x\n", reg, val); 1469 1471 #endif 1470 1472 1471 - trace_regmap_reg_write(map->dev, reg, val); 1473 + trace_regmap_reg_write(map, reg, val); 1472 1474 1473 1475 return map->reg_write(context, reg, val); 1474 1476 } ··· 1771 1773 for (i = 0; i < num_regs; i++) { 1772 1774 int reg = regs[i].reg; 1773 1775 int val = regs[i].def; 1774 - trace_regmap_hw_write_start(map->dev, reg, 1); 1776 + trace_regmap_hw_write_start(map, reg, 1); 1775 1777 map->format.format_reg(u8, reg, map->reg_shift); 1776 1778 u8 += reg_bytes + pad_bytes; 1777 1779 map->format.format_val(u8, val, 0); ··· 1786 1788 1787 1789 for (i = 0; i < num_regs; i++) { 1788 1790 int reg = regs[i].reg; 1789 - trace_regmap_hw_write_done(map->dev, reg, 1); 1791 + trace_regmap_hw_write_done(map, reg, 1); 1790 1792 } 1791 1793 return ret; 1792 1794 } ··· 2057 2059 */ 2058 2060 u8[0] |= map->read_flag_mask; 2059 2061 2060 - trace_regmap_hw_read_start(map->dev, reg, 2061 - val_len / map->format.val_bytes); 2062 + trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes); 2062 2063 2063 2064 ret = map->bus->read(map->bus_context, map->work_buf, 2064 2065 map->format.reg_bytes + map->format.pad_bytes, 2065 2066 val, val_len); 2066 2067 2067 - trace_regmap_hw_read_done(map->dev, reg, 2068 - val_len / map->format.val_bytes); 2068 + trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes); 2069 2069 2070 2070 return ret; 2071 2071 } ··· 2119 2123 dev_info(map->dev, "%x => %x\n", reg, *val); 2120 2124 #endif 2121 2125 2122 - trace_regmap_reg_read(map->dev, reg, *val); 2126 + trace_regmap_reg_read(map, reg, *val); 2123 2127 2124 2128 if (!map->cache_bypass) 2125 2129 regcache_write(map, reg, *val); ··· 2476 2480 struct regmap *map = async->map; 2477 2481 bool wake; 2478 2482 2479 - trace_regmap_async_io_complete(map->dev); 2483 + trace_regmap_async_io_complete(map); 2480 2484 2481 2485 spin_lock(&map->async_lock); 2482 2486 list_move(&async->list, &map->async_free); ··· 2521 2525 if (!map->bus || !map->bus->async_write) 2522 2526 return 0; 2523 2527 2524 - trace_regmap_async_complete_start(map->dev); 2528 + trace_regmap_async_complete_start(map); 2525 2529 2526 2530 wait_event(map->async_waitq, regmap_async_is_done(map)); 2527 2531 ··· 2530 2534 map->async_ret = 0; 2531 2535 spin_unlock_irqrestore(&map->async_lock, flags); 2532 2536 2533 - trace_regmap_async_complete_done(map->dev); 2537 + trace_regmap_async_complete_done(map); 2534 2538 2535 2539 return ret; 2536 2540 }

+4 -4

drivers/block/nbd.c

··· 803 803 return -EINVAL; 804 804 } 805 805 806 - nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); 807 - if (!nbd_dev) 808 - return -ENOMEM; 809 - 810 806 part_shift = 0; 811 807 if (max_part > 0) { 812 808 part_shift = fls(max_part); ··· 823 827 824 828 if (nbds_max > 1UL << (MINORBITS - part_shift)) 825 829 return -EINVAL; 830 + 831 + nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); 832 + if (!nbd_dev) 833 + return -ENOMEM; 826 834 827 835 for (i = 0; i < nbds_max; i++) { 828 836 struct gendisk *disk = alloc_disk(1 << part_shift);

+1

drivers/block/nvme-core.c

··· 3003 3003 } 3004 3004 get_device(dev->device); 3005 3005 3006 + INIT_LIST_HEAD(&dev->node); 3006 3007 INIT_WORK(&dev->probe_work, nvme_async_probe); 3007 3008 schedule_work(&dev->probe_work); 3008 3009 return 0;

+1 -1

drivers/clocksource/em_sti.c

··· 210 210 211 211 ret = em_sti_start(p, USER_CLOCKSOURCE); 212 212 if (!ret) 213 - __clocksource_updatefreq_hz(cs, p->rate); 213 + __clocksource_update_freq_hz(cs, p->rate); 214 214 return ret; 215 215 } 216 216

+1 -1

drivers/clocksource/sh_cmt.c

··· 641 641 642 642 ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE); 643 643 if (!ret) { 644 - __clocksource_updatefreq_hz(cs, ch->rate); 644 + __clocksource_update_freq_hz(cs, ch->rate); 645 645 ch->cs_enabled = true; 646 646 } 647 647 return ret;

+1 -1

drivers/clocksource/sh_tmu.c

··· 272 272 273 273 ret = sh_tmu_enable(ch); 274 274 if (!ret) { 275 - __clocksource_updatefreq_hz(cs, ch->rate); 275 + __clocksource_update_freq_hz(cs, ch->rate); 276 276 ch->cs_enabled = true; 277 277 } 278 278

+1 -12

drivers/gpu/drm/drm_crtc.c

··· 525 525 } 526 526 EXPORT_SYMBOL(drm_framebuffer_reference); 527 527 528 - static void drm_framebuffer_free_bug(struct kref *kref) 529 - { 530 - BUG(); 531 - } 532 - 533 - static void __drm_framebuffer_unreference(struct drm_framebuffer *fb) 534 - { 535 - DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount)); 536 - kref_put(&fb->refcount, drm_framebuffer_free_bug); 537 - } 538 - 539 528 /** 540 529 * drm_framebuffer_unregister_private - unregister a private fb from the lookup idr 541 530 * @fb: fb to unregister ··· 1309 1320 return; 1310 1321 } 1311 1322 /* disconnect the plane from the fb and crtc: */ 1312 - __drm_framebuffer_unreference(plane->old_fb); 1323 + drm_framebuffer_unreference(plane->old_fb); 1313 1324 plane->old_fb = NULL; 1314 1325 plane->fb = NULL; 1315 1326 plane->crtc = NULL;

+21 -17

drivers/gpu/drm/i915/i915_gem.c

··· 2737 2737 2738 2738 WARN_ON(i915_verify_lists(ring->dev)); 2739 2739 2740 - /* Move any buffers on the active list that are no longer referenced 2741 - * by the ringbuffer to the flushing/inactive lists as appropriate, 2742 - * before we free the context associated with the requests. 2740 + /* Retire requests first as we use it above for the early return. 2741 + * If we retire requests last, we may use a later seqno and so clear 2742 + * the requests lists without clearing the active list, leading to 2743 + * confusion. 2743 2744 */ 2744 - while (!list_empty(&ring->active_list)) { 2745 - struct drm_i915_gem_object *obj; 2746 - 2747 - obj = list_first_entry(&ring->active_list, 2748 - struct drm_i915_gem_object, 2749 - ring_list); 2750 - 2751 - if (!i915_gem_request_completed(obj->last_read_req, true)) 2752 - break; 2753 - 2754 - i915_gem_object_move_to_inactive(obj); 2755 - } 2756 - 2757 - 2758 2745 while (!list_empty(&ring->request_list)) { 2759 2746 struct drm_i915_gem_request *request; 2760 2747 struct intel_ringbuffer *ringbuf; ··· 2774 2787 ringbuf->last_retired_head = request->postfix; 2775 2788 2776 2789 i915_gem_free_request(request); 2790 + } 2791 + 2792 + /* Move any buffers on the active list that are no longer referenced 2793 + * by the ringbuffer to the flushing/inactive lists as appropriate, 2794 + * before we free the context associated with the requests. 2795 + */ 2796 + while (!list_empty(&ring->active_list)) { 2797 + struct drm_i915_gem_object *obj; 2798 + 2799 + obj = list_first_entry(&ring->active_list, 2800 + struct drm_i915_gem_object, 2801 + ring_list); 2802 + 2803 + if (!i915_gem_request_completed(obj->last_read_req, true)) 2804 + break; 2805 + 2806 + i915_gem_object_move_to_inactive(obj); 2777 2807 } 2778 2808 2779 2809 if (unlikely(ring->trace_irq_req &&

+13 -5

drivers/gpu/drm/i915/intel_display.c

··· 2438 2438 if (!intel_crtc->base.primary->fb) 2439 2439 return; 2440 2440 2441 - if (intel_alloc_plane_obj(intel_crtc, plane_config)) 2441 + if (intel_alloc_plane_obj(intel_crtc, plane_config)) { 2442 + struct drm_plane *primary = intel_crtc->base.primary; 2443 + 2444 + primary->state->crtc = &intel_crtc->base; 2445 + primary->crtc = &intel_crtc->base; 2446 + update_state_fb(primary); 2447 + 2442 2448 return; 2449 + } 2443 2450 2444 2451 kfree(intel_crtc->base.primary->fb); 2445 2452 intel_crtc->base.primary->fb = NULL; ··· 2469 2462 continue; 2470 2463 2471 2464 if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) { 2465 + struct drm_plane *primary = intel_crtc->base.primary; 2466 + 2472 2467 if (obj->tiling_mode != I915_TILING_NONE) 2473 2468 dev_priv->preserve_bios_swizzle = true; 2474 2469 2475 2470 drm_framebuffer_reference(c->primary->fb); 2476 - intel_crtc->base.primary->fb = c->primary->fb; 2471 + primary->fb = c->primary->fb; 2472 + primary->state->crtc = &intel_crtc->base; 2473 + primary->crtc = &intel_crtc->base; 2477 2474 obj->frontbuffer_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); 2478 2475 break; 2479 2476 } ··· 6674 6663 plane_config->size); 6675 6664 6676 6665 crtc->base.primary->fb = fb; 6677 - update_state_fb(crtc->base.primary); 6678 6666 } 6679 6667 6680 6668 static void chv_crtc_clock_get(struct intel_crtc *crtc, ··· 7714 7704 plane_config->size); 7715 7705 7716 7706 crtc->base.primary->fb = fb; 7717 - update_state_fb(crtc->base.primary); 7718 7707 return; 7719 7708 7720 7709 error: ··· 7807 7798 plane_config->size); 7808 7799 7809 7800 crtc->base.primary->fb = fb; 7810 - update_state_fb(crtc->base.primary); 7811 7801 } 7812 7802 7813 7803 static bool ironlake_get_pipe_config(struct intel_crtc *crtc,

+16 -10

drivers/md/dm.c

··· 433 433 434 434 dm_get(md); 435 435 atomic_inc(&md->open_count); 436 - 437 436 out: 438 437 spin_unlock(&_minor_lock); 439 438 ··· 441 442 442 443 static void dm_blk_close(struct gendisk *disk, fmode_t mode) 443 444 { 444 - struct mapped_device *md = disk->private_data; 445 + struct mapped_device *md; 445 446 446 447 spin_lock(&_minor_lock); 448 + 449 + md = disk->private_data; 450 + if (WARN_ON(!md)) 451 + goto out; 447 452 448 453 if (atomic_dec_and_test(&md->open_count) && 449 454 (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) 450 455 queue_work(deferred_remove_workqueue, &deferred_remove_work); 451 456 452 457 dm_put(md); 453 - 458 + out: 454 459 spin_unlock(&_minor_lock); 455 460 } 456 461 ··· 2244 2241 int minor = MINOR(disk_devt(md->disk)); 2245 2242 2246 2243 unlock_fs(md); 2247 - bdput(md->bdev); 2248 2244 destroy_workqueue(md->wq); 2249 2245 2250 2246 if (md->kworker_task) ··· 2254 2252 mempool_destroy(md->rq_pool); 2255 2253 if (md->bs) 2256 2254 bioset_free(md->bs); 2257 - blk_integrity_unregister(md->disk); 2258 - del_gendisk(md->disk); 2255 + 2259 2256 cleanup_srcu_struct(&md->io_barrier); 2260 2257 free_table_devices(&md->table_devices); 2261 - free_minor(minor); 2258 + dm_stats_cleanup(&md->stats); 2262 2259 2263 2260 spin_lock(&_minor_lock); 2264 2261 md->disk->private_data = NULL; 2265 2262 spin_unlock(&_minor_lock); 2266 - 2263 + if (blk_get_integrity(md->disk)) 2264 + blk_integrity_unregister(md->disk); 2265 + del_gendisk(md->disk); 2267 2266 put_disk(md->disk); 2268 2267 blk_cleanup_queue(md->queue); 2269 - dm_stats_cleanup(&md->stats); 2268 + bdput(md->bdev); 2269 + free_minor(minor); 2270 + 2270 2271 module_put(THIS_MODULE); 2271 2272 kfree(md); 2272 2273 } ··· 2647 2642 2648 2643 might_sleep(); 2649 2644 2650 - spin_lock(&_minor_lock); 2651 2645 map = dm_get_live_table(md, &srcu_idx); 2646 + 2647 + spin_lock(&_minor_lock); 2652 2648 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); 2653 2649 set_bit(DMF_FREEING, &md->flags); 2654 2650 spin_unlock(&_minor_lock);

+1 -1

drivers/mfd/kempld-core.c

··· 739 739 for (id = kempld_dmi_table; 740 740 id->matches[0].slot != DMI_NONE; id++) 741 741 if (strstr(id->ident, force_device_id)) 742 - if (id->callback && id->callback(id)) 742 + if (id->callback && !id->callback(id)) 743 743 break; 744 744 if (id->matches[0].slot == DMI_NONE) 745 745 return -ENODEV;

+24 -6

drivers/mfd/rtsx_usb.c

··· 196 196 int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) 197 197 { 198 198 u16 value; 199 + u8 *buf; 200 + int ret; 199 201 200 202 if (!data) 201 203 return -EINVAL; 202 - *data = 0; 204 + 205 + buf = kzalloc(sizeof(u8), GFP_KERNEL); 206 + if (!buf) 207 + return -ENOMEM; 203 208 204 209 addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT; 205 210 value = swab16(addr); 206 211 207 - return usb_control_msg(ucr->pusb_dev, 212 + ret = usb_control_msg(ucr->pusb_dev, 208 213 usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, 209 214 USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 210 - value, 0, data, 1, 100); 215 + value, 0, buf, 1, 100); 216 + *data = *buf; 217 + 218 + kfree(buf); 219 + return ret; 211 220 } 212 221 EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register); 213 222 ··· 297 288 int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) 298 289 { 299 290 int ret; 291 + u16 *buf; 300 292 301 293 if (!status) 302 294 return -EINVAL; 303 295 304 - if (polling_pipe == 0) 296 + if (polling_pipe == 0) { 297 + buf = kzalloc(sizeof(u16), GFP_KERNEL); 298 + if (!buf) 299 + return -ENOMEM; 300 + 305 301 ret = usb_control_msg(ucr->pusb_dev, 306 302 usb_rcvctrlpipe(ucr->pusb_dev, 0), 307 303 RTSX_USB_REQ_POLL, 308 304 USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 309 - 0, 0, status, 2, 100); 310 - else 305 + 0, 0, buf, 2, 100); 306 + *status = *buf; 307 + 308 + kfree(buf); 309 + } else { 311 310 ret = rtsx_usb_get_status_with_bulk(ucr, status); 311 + } 312 312 313 313 /* usb_control_msg may return positive when success */ 314 314 if (ret < 0)

+29 -2

drivers/net/ethernet/amd/pcnet32.c

··· 1543 1543 { 1544 1544 struct pcnet32_private *lp; 1545 1545 int i, media; 1546 - int fdx, mii, fset, dxsuflo; 1546 + int fdx, mii, fset, dxsuflo, sram; 1547 1547 int chip_version; 1548 1548 char *chipname; 1549 1549 struct net_device *dev; ··· 1580 1580 } 1581 1581 1582 1582 /* initialize variables */ 1583 - fdx = mii = fset = dxsuflo = 0; 1583 + fdx = mii = fset = dxsuflo = sram = 0; 1584 1584 chip_version = (chip_version >> 12) & 0xffff; 1585 1585 1586 1586 switch (chip_version) { ··· 1613 1613 chipname = "PCnet/FAST III 79C973"; /* PCI */ 1614 1614 fdx = 1; 1615 1615 mii = 1; 1616 + sram = 1; 1616 1617 break; 1617 1618 case 0x2626: 1618 1619 chipname = "PCnet/Home 79C978"; /* PCI */ ··· 1637 1636 chipname = "PCnet/FAST III 79C975"; /* PCI */ 1638 1637 fdx = 1; 1639 1638 mii = 1; 1639 + sram = 1; 1640 1640 break; 1641 1641 case 0x2628: 1642 1642 chipname = "PCnet/PRO 79C976"; ··· 1664 1662 a->write_csr(ioaddr, 80, 1665 1663 (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00); 1666 1664 dxsuflo = 1; 1665 + } 1666 + 1667 + /* 1668 + * The Am79C973/Am79C975 controllers come with 12K of SRAM 1669 + * which we can use for the Tx/Rx buffers but most importantly, 1670 + * the use of SRAM allow us to use the BCR18:NOUFLO bit to avoid 1671 + * Tx fifo underflows. 1672 + */ 1673 + if (sram) { 1674 + /* 1675 + * The SRAM is being configured in two steps. First we 1676 + * set the SRAM size in the BCR25:SRAM_SIZE bits. According 1677 + * to the datasheet, each bit corresponds to a 512-byte 1678 + * page so we can have at most 24 pages. The SRAM_SIZE 1679 + * holds the value of the upper 8 bits of the 16-bit SRAM size. 1680 + * The low 8-bits start at 0x00 and end at 0xff. So the 1681 + * address range is from 0x0000 up to 0x17ff. Therefore, 1682 + * the SRAM_SIZE is set to 0x17. The next step is to set 1683 + * the BCR26:SRAM_BND midway through so the Tx and Rx 1684 + * buffers can share the SRAM equally. 1685 + */ 1686 + a->write_bcr(ioaddr, 25, 0x17); 1687 + a->write_bcr(ioaddr, 26, 0xc); 1688 + /* And finally enable the NOUFLO bit */ 1689 + a->write_bcr(ioaddr, 18, a->read_bcr(ioaddr, 18) | (1 << 11)); 1667 1690 } 1668 1691 1669 1692 dev = alloc_etherdev(sizeof(*lp));

+2

drivers/net/ethernet/emulex/benet/be.h

··· 354 354 u16 vlan_tag; 355 355 u32 tx_rate; 356 356 u32 plink_tracking; 357 + u32 privileges; 357 358 }; 358 359 359 360 enum vf_state { ··· 424 423 425 424 u8 __iomem *csr; /* CSR BAR used only for BE2/3 */ 426 425 u8 __iomem *db; /* Door Bell */ 426 + u8 __iomem *pcicfg; /* On SH,BEx only. Shadow of PCI config space */ 427 427 428 428 struct mutex mbox_lock; /* For serializing mbox cmds to BE card */ 429 429 struct be_dma_mem mbox_mem;

+7 -10

drivers/net/ethernet/emulex/benet/be_cmds.c

··· 1902 1902 { 1903 1903 int num_eqs, i = 0; 1904 1904 1905 - if (lancer_chip(adapter) && num > 8) { 1906 - while (num) { 1907 - num_eqs = min(num, 8); 1908 - __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs); 1909 - i += num_eqs; 1910 - num -= num_eqs; 1911 - } 1912 - } else { 1913 - __be_cmd_modify_eqd(adapter, set_eqd, num); 1905 + while (num) { 1906 + num_eqs = min(num, 8); 1907 + __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs); 1908 + i += num_eqs; 1909 + num -= num_eqs; 1914 1910 } 1915 1911 1916 1912 return 0; ··· 1914 1918 1915 1919 /* Uses sycnhronous mcc */ 1916 1920 int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, 1917 - u32 num) 1921 + u32 num, u32 domain) 1918 1922 { 1919 1923 struct be_mcc_wrb *wrb; 1920 1924 struct be_cmd_req_vlan_config *req; ··· 1932 1936 be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, 1933 1937 OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req), 1934 1938 wrb, NULL); 1939 + req->hdr.domain = domain; 1935 1940 1936 1941 req->interface_id = if_id; 1937 1942 req->untagged = BE_IF_FLAGS_UNTAGGED & be_if_cap_flags(adapter) ? 1 : 0;

+1 -1

drivers/net/ethernet/emulex/benet/be_cmds.h

··· 2256 2256 int be_cmd_get_fw_ver(struct be_adapter *adapter); 2257 2257 int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *, int num); 2258 2258 int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, 2259 - u32 num); 2259 + u32 num, u32 domain); 2260 2260 int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 status); 2261 2261 int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc); 2262 2262 int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc);

+98 -33

drivers/net/ethernet/emulex/benet/be_main.c

··· 1171 1171 for_each_set_bit(i, adapter->vids, VLAN_N_VID) 1172 1172 vids[num++] = cpu_to_le16(i); 1173 1173 1174 - status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num); 1174 + status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0); 1175 1175 if (status) { 1176 1176 dev_err(dev, "Setting HW VLAN filtering failed\n"); 1177 1177 /* Set to VLAN promisc mode as setting VLAN filter failed */ ··· 1380 1380 return 0; 1381 1381 } 1382 1382 1383 + static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan) 1384 + { 1385 + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; 1386 + u16 vids[BE_NUM_VLANS_SUPPORTED]; 1387 + int vf_if_id = vf_cfg->if_handle; 1388 + int status; 1389 + 1390 + /* Enable Transparent VLAN Tagging */ 1391 + status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0); 1392 + if (status) 1393 + return status; 1394 + 1395 + /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */ 1396 + vids[0] = 0; 1397 + status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1); 1398 + if (!status) 1399 + dev_info(&adapter->pdev->dev, 1400 + "Cleared guest VLANs on VF%d", vf); 1401 + 1402 + /* After TVT is enabled, disallow VFs to program VLAN filters */ 1403 + if (vf_cfg->privileges & BE_PRIV_FILTMGMT) { 1404 + status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges & 1405 + ~BE_PRIV_FILTMGMT, vf + 1); 1406 + if (!status) 1407 + vf_cfg->privileges &= ~BE_PRIV_FILTMGMT; 1408 + } 1409 + return 0; 1410 + } 1411 + 1412 + static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) 1413 + { 1414 + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; 1415 + struct device *dev = &adapter->pdev->dev; 1416 + int status; 1417 + 1418 + /* Reset Transparent VLAN Tagging. */ 1419 + status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1, 1420 + vf_cfg->if_handle, 0); 1421 + if (status) 1422 + return status; 1423 + 1424 + /* Allow VFs to program VLAN filtering */ 1425 + if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) { 1426 + status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges | 1427 + BE_PRIV_FILTMGMT, vf + 1); 1428 + if (!status) { 1429 + vf_cfg->privileges |= BE_PRIV_FILTMGMT; 1430 + dev_info(dev, "VF%d: FILTMGMT priv enabled", vf); 1431 + } 1432 + } 1433 + 1434 + dev_info(dev, 1435 + "Disable/re-enable i/f in VM to clear Transparent VLAN tag"); 1436 + return 0; 1437 + } 1438 + 1383 1439 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) 1384 1440 { 1385 1441 struct be_adapter *adapter = netdev_priv(netdev); 1386 1442 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; 1387 - int status = 0; 1443 + int status; 1388 1444 1389 1445 if (!sriov_enabled(adapter)) 1390 1446 return -EPERM; ··· 1450 1394 1451 1395 if (vlan || qos) { 1452 1396 vlan |= qos << VLAN_PRIO_SHIFT; 1453 - if (vf_cfg->vlan_tag != vlan) 1454 - status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, 1455 - vf_cfg->if_handle, 0); 1397 + status = be_set_vf_tvt(adapter, vf, vlan); 1456 1398 } else { 1457 - /* Reset Transparent Vlan Tagging. */ 1458 - status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, 1459 - vf + 1, vf_cfg->if_handle, 0); 1399 + status = be_clear_vf_tvt(adapter, vf); 1460 1400 } 1461 1401 1462 1402 if (status) { 1463 1403 dev_err(&adapter->pdev->dev, 1464 - "VLAN %d config on VF %d failed : %#x\n", vlan, 1465 - vf, status); 1404 + "VLAN %d config on VF %d failed : %#x\n", vlan, vf, 1405 + status); 1466 1406 return be_cmd_status(status); 1467 1407 } 1468 1408 1469 1409 vf_cfg->vlan_tag = vlan; 1470 - 1471 1410 return 0; 1472 1411 } 1473 1412 ··· 2823 2772 } 2824 2773 } 2825 2774 } else { 2826 - pci_read_config_dword(adapter->pdev, 2827 - PCICFG_UE_STATUS_LOW, &ue_lo); 2828 - pci_read_config_dword(adapter->pdev, 2829 - PCICFG_UE_STATUS_HIGH, &ue_hi); 2830 - pci_read_config_dword(adapter->pdev, 2831 - PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask); 2832 - pci_read_config_dword(adapter->pdev, 2833 - PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask); 2775 + ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW); 2776 + ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH); 2777 + ue_lo_mask = ioread32(adapter->pcicfg + 2778 + PCICFG_UE_STATUS_LOW_MASK); 2779 + ue_hi_mask = ioread32(adapter->pcicfg + 2780 + PCICFG_UE_STATUS_HI_MASK); 2834 2781 2835 2782 ue_lo = (ue_lo & ~ue_lo_mask); 2836 2783 ue_hi = (ue_hi & ~ue_hi_mask); ··· 3388 3339 u32 cap_flags, u32 vf) 3389 3340 { 3390 3341 u32 en_flags; 3391 - int status; 3392 3342 3393 3343 en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | 3394 3344 BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS | ··· 3395 3347 3396 3348 en_flags &= cap_flags; 3397 3349 3398 - status = be_cmd_if_create(adapter, cap_flags, en_flags, 3399 - if_handle, vf); 3400 - 3401 - return status; 3350 + return be_cmd_if_create(adapter, cap_flags, en_flags, if_handle, vf); 3402 3351 } 3403 3352 3404 3353 static int be_vfs_if_create(struct be_adapter *adapter) ··· 3413 3368 if (!BE3_chip(adapter)) { 3414 3369 status = be_cmd_get_profile_config(adapter, &res, 3415 3370 vf + 1); 3416 - if (!status) 3371 + if (!status) { 3417 3372 cap_flags = res.if_cap_flags; 3373 + /* Prevent VFs from enabling VLAN promiscuous 3374 + * mode 3375 + */ 3376 + cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS; 3377 + } 3418 3378 } 3419 3379 3420 3380 status = be_if_create(adapter, &vf_cfg->if_handle, ··· 3453 3403 struct device *dev = &adapter->pdev->dev; 3454 3404 struct be_vf_cfg *vf_cfg; 3455 3405 int status, old_vfs, vf; 3456 - u32 privileges; 3457 3406 3458 3407 old_vfs = pci_num_vf(adapter->pdev); 3459 3408 ··· 3482 3433 3483 3434 for_all_vfs(adapter, vf_cfg, vf) { 3484 3435 /* Allow VFs to programs MAC/VLAN filters */ 3485 - status = be_cmd_get_fn_privileges(adapter, &privileges, vf + 1); 3486 - if (!status && !(privileges & BE_PRIV_FILTMGMT)) { 3436 + status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges, 3437 + vf + 1); 3438 + if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) { 3487 3439 status = be_cmd_set_fn_privileges(adapter, 3488 - privileges | 3440 + vf_cfg->privileges | 3489 3441 BE_PRIV_FILTMGMT, 3490 3442 vf + 1); 3491 - if (!status) 3443 + if (!status) { 3444 + vf_cfg->privileges |= BE_PRIV_FILTMGMT; 3492 3445 dev_info(dev, "VF%d has FILTMGMT privilege\n", 3493 3446 vf); 3447 + } 3494 3448 } 3495 3449 3496 3450 /* Allow full available bandwidth */ ··· 4872 4820 4873 4821 static int be_map_pci_bars(struct be_adapter *adapter) 4874 4822 { 4823 + struct pci_dev *pdev = adapter->pdev; 4875 4824 u8 __iomem *addr; 4876 4825 4877 4826 if (BEx_chip(adapter) && be_physfn(adapter)) { 4878 - adapter->csr = pci_iomap(adapter->pdev, 2, 0); 4827 + adapter->csr = pci_iomap(pdev, 2, 0); 4879 4828 if (!adapter->csr) 4880 4829 return -ENOMEM; 4881 4830 } 4882 4831 4883 - addr = pci_iomap(adapter->pdev, db_bar(adapter), 0); 4832 + addr = pci_iomap(pdev, db_bar(adapter), 0); 4884 4833 if (!addr) 4885 4834 goto pci_map_err; 4886 4835 adapter->db = addr; 4836 + 4837 + if (skyhawk_chip(adapter) || BEx_chip(adapter)) { 4838 + if (be_physfn(adapter)) { 4839 + /* PCICFG is the 2nd BAR in BE2 */ 4840 + addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0); 4841 + if (!addr) 4842 + goto pci_map_err; 4843 + adapter->pcicfg = addr; 4844 + } else { 4845 + adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET; 4846 + } 4847 + } 4887 4848 4888 4849 be_roce_map_pci_bars(adapter); 4889 4850 return 0; 4890 4851 4891 4852 pci_map_err: 4892 - dev_err(&adapter->pdev->dev, "Error in mapping PCI BARs\n"); 4853 + dev_err(&pdev->dev, "Error in mapping PCI BARs\n"); 4893 4854 be_unmap_pci_bars(adapter); 4894 4855 return -ENOMEM; 4895 4856 }

+24 -6

drivers/net/usb/cx82310_eth.c

··· 46 46 }; 47 47 48 48 #define CMD_PACKET_SIZE 64 49 - /* first command after power on can take around 8 seconds */ 50 - #define CMD_TIMEOUT 15000 49 + #define CMD_TIMEOUT 100 51 50 #define CMD_REPLY_RETRY 5 52 51 53 52 #define CX82310_MTU 1514 ··· 77 78 ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, CMD_EP), buf, 78 79 CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT); 79 80 if (ret < 0) { 80 - dev_err(&dev->udev->dev, "send command %#x: error %d\n", 81 - cmd, ret); 81 + if (cmd != CMD_GET_LINK_STATUS) 82 + dev_err(&dev->udev->dev, "send command %#x: error %d\n", 83 + cmd, ret); 82 84 goto end; 83 85 } 84 86 ··· 90 90 buf, CMD_PACKET_SIZE, &actual_len, 91 91 CMD_TIMEOUT); 92 92 if (ret < 0) { 93 - dev_err(&dev->udev->dev, 94 - "reply receive error %d\n", ret); 93 + if (cmd != CMD_GET_LINK_STATUS) 94 + dev_err(&dev->udev->dev, 95 + "reply receive error %d\n", 96 + ret); 95 97 goto end; 96 98 } 97 99 if (actual_len > 0) ··· 136 134 int ret; 137 135 char buf[15]; 138 136 struct usb_device *udev = dev->udev; 137 + u8 link[3]; 138 + int timeout = 50; 139 139 140 140 /* avoid ADSL modems - continue only if iProduct is "USB NET CARD" */ 141 141 if (usb_string(udev, udev->descriptor.iProduct, buf, sizeof(buf)) > 0 ··· 163 159 dev->partial_data = (unsigned long) kmalloc(dev->hard_mtu, GFP_KERNEL); 164 160 if (!dev->partial_data) 165 161 return -ENOMEM; 162 + 163 + /* wait for firmware to become ready (indicated by the link being up) */ 164 + while (--timeout) { 165 + ret = cx82310_cmd(dev, CMD_GET_LINK_STATUS, true, NULL, 0, 166 + link, sizeof(link)); 167 + /* the command can time out during boot - it's not an error */ 168 + if (!ret && link[0] == 1 && link[2] == 1) 169 + break; 170 + msleep(500); 171 + }; 172 + if (!timeout) { 173 + dev_err(&udev->dev, "firmware not ready in time\n"); 174 + return -ETIMEDOUT; 175 + } 166 176 167 177 /* enable ethernet mode (?) */ 168 178 ret = cx82310_cmd(dev, CMD_ETHERNET_MODE, true, "\x01", 1, NULL, 0);

+4

drivers/regulator/palmas-regulator.c

··· 1572 1572 if (!pmic) 1573 1573 return -ENOMEM; 1574 1574 1575 + if (of_device_is_compatible(node, "ti,tps659038-pmic")) 1576 + palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr = 1577 + TPS659038_REGEN2_CTRL; 1578 + 1575 1579 pmic->dev = &pdev->dev; 1576 1580 pmic->palmas = palmas; 1577 1581 palmas->pmic = pmic;

+9 -8

drivers/rtc/rtc-mrst.c

··· 413 413 mrst->dev = NULL; 414 414 } 415 415 416 - #ifdef CONFIG_PM 417 - static int mrst_suspend(struct device *dev, pm_message_t mesg) 416 + #ifdef CONFIG_PM_SLEEP 417 + static int mrst_suspend(struct device *dev) 418 418 { 419 419 struct mrst_rtc *mrst = dev_get_drvdata(dev); 420 420 unsigned char tmp; ··· 453 453 */ 454 454 static inline int mrst_poweroff(struct device *dev) 455 455 { 456 - return mrst_suspend(dev, PMSG_HIBERNATE); 456 + return mrst_suspend(dev); 457 457 } 458 458 459 459 static int mrst_resume(struct device *dev) ··· 490 490 return 0; 491 491 } 492 492 493 + static SIMPLE_DEV_PM_OPS(mrst_pm_ops, mrst_suspend, mrst_resume); 494 + #define MRST_PM_OPS (&mrst_pm_ops) 495 + 493 496 #else 494 - #define mrst_suspend NULL 495 - #define mrst_resume NULL 497 + #define MRST_PM_OPS NULL 496 498 497 499 static inline int mrst_poweroff(struct device *dev) 498 500 { ··· 531 529 .remove = vrtc_mrst_platform_remove, 532 530 .shutdown = vrtc_mrst_platform_shutdown, 533 531 .driver = { 534 - .name = (char *) driver_name, 535 - .suspend = mrst_suspend, 536 - .resume = mrst_resume, 532 + .name = driver_name, 533 + .pm = MRST_PM_OPS, 537 534 } 538 535 }; 539 536

+2 -1

drivers/scsi/ipr.c

··· 6815 6815 }; 6816 6816 6817 6817 static struct ata_port_info sata_port_info = { 6818 - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA, 6818 + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | 6819 + ATA_FLAG_SAS_HOST, 6819 6820 .pio_mask = ATA_PIO4_ONLY, 6820 6821 .mwdma_mask = ATA_MWDMA2, 6821 6822 .udma_mask = ATA_UDMA6,

+2 -1

drivers/scsi/libsas/sas_ata.c

··· 547 547 }; 548 548 549 549 static struct ata_port_info sata_port_info = { 550 - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ, 550 + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ | 551 + ATA_FLAG_SAS_HOST, 551 552 .pio_mask = ATA_PIO4, 552 553 .mwdma_mask = ATA_MWDMA2, 553 554 .udma_mask = ATA_UDMA6,

+4 -2

drivers/spi/spi-dw-mid.c

··· 108 108 { 109 109 struct dw_spi *dws = arg; 110 110 111 - if (test_and_clear_bit(TX_BUSY, &dws->dma_chan_busy) & BIT(RX_BUSY)) 111 + clear_bit(TX_BUSY, &dws->dma_chan_busy); 112 + if (test_bit(RX_BUSY, &dws->dma_chan_busy)) 112 113 return; 113 114 dw_spi_xfer_done(dws); 114 115 } ··· 157 156 { 158 157 struct dw_spi *dws = arg; 159 158 160 - if (test_and_clear_bit(RX_BUSY, &dws->dma_chan_busy) & BIT(TX_BUSY)) 159 + clear_bit(RX_BUSY, &dws->dma_chan_busy); 160 + if (test_bit(TX_BUSY, &dws->dma_chan_busy)) 161 161 return; 162 162 dw_spi_xfer_done(dws); 163 163 }

+5 -4

drivers/spi/spi-qup.c

··· 498 498 struct resource *res; 499 499 struct device *dev; 500 500 void __iomem *base; 501 - u32 max_freq, iomode; 501 + u32 max_freq, iomode, num_cs; 502 502 int ret, irq, size; 503 503 504 504 dev = &pdev->dev; ··· 550 550 } 551 551 552 552 /* use num-cs unless not present or out of range */ 553 - if (of_property_read_u16(dev->of_node, "num-cs", 554 - &master->num_chipselect) || 555 - (master->num_chipselect > SPI_NUM_CHIPSELECTS)) 553 + if (of_property_read_u32(dev->of_node, "num-cs", &num_cs) || 554 + num_cs > SPI_NUM_CHIPSELECTS) 556 555 master->num_chipselect = SPI_NUM_CHIPSELECTS; 556 + else 557 + master->num_chipselect = num_cs; 557 558 558 559 master->bus_num = pdev->id; 559 560 master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;

+3 -2

drivers/spi/spi.c

··· 1105 1105 "failed to unprepare message: %d\n", ret); 1106 1106 } 1107 1107 } 1108 + 1109 + trace_spi_message_done(mesg); 1110 + 1108 1111 master->cur_msg_prepared = false; 1109 1112 1110 1113 mesg->state = NULL; 1111 1114 if (mesg->complete) 1112 1115 mesg->complete(mesg->context); 1113 - 1114 - trace_spi_message_done(mesg); 1115 1116 } 1116 1117 EXPORT_SYMBOL_GPL(spi_finalize_current_message); 1117 1118

+12 -7

fs/affs/file.c

··· 699 699 boff = tmp % bsize; 700 700 if (boff) { 701 701 bh = affs_bread_ino(inode, bidx, 0); 702 - if (IS_ERR(bh)) 703 - return PTR_ERR(bh); 702 + if (IS_ERR(bh)) { 703 + written = PTR_ERR(bh); 704 + goto err_first_bh; 705 + } 704 706 tmp = min(bsize - boff, to - from); 705 707 BUG_ON(boff + tmp > bsize || tmp > bsize); 706 708 memcpy(AFFS_DATA(bh) + boff, data + from, tmp); ··· 714 712 bidx++; 715 713 } else if (bidx) { 716 714 bh = affs_bread_ino(inode, bidx - 1, 0); 717 - if (IS_ERR(bh)) 718 - return PTR_ERR(bh); 715 + if (IS_ERR(bh)) { 716 + written = PTR_ERR(bh); 717 + goto err_first_bh; 718 + } 719 719 } 720 720 while (from + bsize <= to) { 721 721 prev_bh = bh; 722 722 bh = affs_getemptyblk_ino(inode, bidx); 723 723 if (IS_ERR(bh)) 724 - goto out; 724 + goto err_bh; 725 725 memcpy(AFFS_DATA(bh), data + from, bsize); 726 726 if (buffer_new(bh)) { 727 727 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); ··· 755 751 prev_bh = bh; 756 752 bh = affs_bread_ino(inode, bidx, 1); 757 753 if (IS_ERR(bh)) 758 - goto out; 754 + goto err_bh; 759 755 tmp = min(bsize, to - from); 760 756 BUG_ON(tmp > bsize); 761 757 memcpy(AFFS_DATA(bh), data + from, tmp); ··· 794 790 if (tmp > inode->i_size) 795 791 inode->i_size = AFFS_I(inode)->mmu_private = tmp; 796 792 793 + err_first_bh: 797 794 unlock_page(page); 798 795 page_cache_release(page); 799 796 800 797 return written; 801 798 802 - out: 799 + err_bh: 803 800 bh = prev_bh; 804 801 if (!written) 805 802 written = PTR_ERR(bh);

+11 -9

fs/hfsplus/brec.c

··· 131 131 hfs_bnode_write(node, entry, data_off + key_len, entry_len); 132 132 hfs_bnode_dump(node); 133 133 134 - if (new_node) { 135 - /* update parent key if we inserted a key 136 - * at the start of the first node 137 - */ 138 - if (!rec && new_node != node) 139 - hfs_brec_update_parent(fd); 134 + /* 135 + * update parent key if we inserted a key 136 + * at the start of the node and it is not the new node 137 + */ 138 + if (!rec && new_node != node) { 139 + hfs_bnode_read_key(node, fd->search_key, data_off + size); 140 + hfs_brec_update_parent(fd); 141 + } 140 142 143 + if (new_node) { 141 144 hfs_bnode_put(fd->bnode); 142 145 if (!new_node->parent) { 143 146 hfs_btree_inc_height(tree); ··· 170 167 } 171 168 goto again; 172 169 } 173 - 174 - if (!rec) 175 - hfs_brec_update_parent(fd); 176 170 177 171 return 0; 178 172 } ··· 370 370 if (IS_ERR(parent)) 371 371 return PTR_ERR(parent); 372 372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key); 373 + if (fd->record < 0) 374 + return -ENOENT; 373 375 hfs_bnode_dump(parent); 374 376 rec = fd->record; 375 377

+19 -2

include/linux/clockchips.h

··· 39 39 CLOCK_EVT_MODE_PERIODIC, 40 40 CLOCK_EVT_MODE_ONESHOT, 41 41 CLOCK_EVT_MODE_RESUME, 42 + 43 + /* Legacy ->set_mode() callback doesn't support below modes */ 42 44 }; 43 45 44 46 /* ··· 83 81 * @mode: operating mode assigned by the management code 84 82 * @features: features 85 83 * @retries: number of forced programming retries 86 - * @set_mode: set mode function 84 + * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. 85 + * @set_mode_periodic: switch mode to periodic, if !set_mode 86 + * @set_mode_oneshot: switch mode to oneshot, if !set_mode 87 + * @set_mode_shutdown: switch mode to shutdown, if !set_mode 88 + * @set_mode_resume: resume clkevt device, if !set_mode 87 89 * @broadcast: function to broadcast events 88 90 * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration 89 91 * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration ··· 114 108 unsigned int features; 115 109 unsigned long retries; 116 110 117 - void (*broadcast)(const struct cpumask *mask); 111 + /* 112 + * Mode transition callback(s): Only one of the two groups should be 113 + * defined: 114 + * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. 115 + * - set_mode_{shutdown|periodic|oneshot|resume}(). 116 + */ 118 117 void (*set_mode)(enum clock_event_mode mode, 119 118 struct clock_event_device *); 119 + int (*set_mode_periodic)(struct clock_event_device *); 120 + int (*set_mode_oneshot)(struct clock_event_device *); 121 + int (*set_mode_shutdown)(struct clock_event_device *); 122 + int (*set_mode_resume)(struct clock_event_device *); 123 + 124 + void (*broadcast)(const struct cpumask *mask); 120 125 void (*suspend)(struct clock_event_device *); 121 126 void (*resume)(struct clock_event_device *); 122 127 unsigned long min_delta_ticks;

+17 -8

include/linux/clocksource.h

··· 56 56 * @shift: cycle to nanosecond divisor (power of two) 57 57 * @max_idle_ns: max idle time permitted by the clocksource (nsecs) 58 58 * @maxadj: maximum adjustment value to mult (~11%) 59 + * @max_cycles: maximum safe cycle value which won't overflow on multiplication 59 60 * @flags: flags describing special properties 60 61 * @archdata: arch-specific data 61 62 * @suspend: suspend function for the clocksource, if necessary ··· 77 76 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA 78 77 struct arch_clocksource_data archdata; 79 78 #endif 80 - 79 + u64 max_cycles; 81 80 const char *name; 82 81 struct list_head list; 83 82 int rating; ··· 179 178 } 180 179 181 180 182 - extern int clocksource_register(struct clocksource*); 183 181 extern int clocksource_unregister(struct clocksource*); 184 182 extern void clocksource_touch_watchdog(void); 185 183 extern struct clocksource* clocksource_get_next(void); ··· 189 189 extern void clocksource_mark_unstable(struct clocksource *cs); 190 190 191 191 extern u64 192 - clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask); 192 + clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles); 193 193 extern void 194 194 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); 195 195 ··· 200 200 extern int 201 201 __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); 202 202 extern void 203 - __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq); 203 + __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq); 204 + 205 + /* 206 + * Don't call this unless you are a default clocksource 207 + * (AKA: jiffies) and absolutely have to. 208 + */ 209 + static inline int __clocksource_register(struct clocksource *cs) 210 + { 211 + return __clocksource_register_scale(cs, 1, 0); 212 + } 204 213 205 214 static inline int clocksource_register_hz(struct clocksource *cs, u32 hz) 206 215 { ··· 221 212 return __clocksource_register_scale(cs, 1000, khz); 222 213 } 223 214 224 - static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz) 215 + static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz) 225 216 { 226 - __clocksource_updatefreq_scale(cs, 1, hz); 217 + __clocksource_update_freq_scale(cs, 1, hz); 227 218 } 228 219 229 - static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz) 220 + static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz) 230 221 { 231 - __clocksource_updatefreq_scale(cs, 1000, khz); 222 + __clocksource_update_freq_scale(cs, 1000, khz); 232 223 } 233 224 234 225

+1

include/linux/libata.h

··· 232 232 * led */ 233 233 ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ 234 234 ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */ 235 + ATA_FLAG_SAS_HOST = (1 << 25), /* SAS host */ 235 236 236 237 /* bits 24:31 of ap->flags are reserved for LLD specific flags */ 237 238

+3

include/linux/mfd/palmas.h

··· 2999 2999 #define PALMAS_GPADC_TRIM15 0x0E 3000 3000 #define PALMAS_GPADC_TRIM16 0x0F 3001 3001 3002 + /* TPS659038 regen2_ctrl offset iss different from palmas */ 3003 + #define TPS659038_REGEN2_CTRL 0x12 3004 + 3002 3005 /* TPS65917 Interrupt registers */ 3003 3006 3004 3007 /* Registers for function INTERRUPT */

+2

include/linux/perf_event.h

··· 173 173 * pmu::capabilities flags 174 174 */ 175 175 #define PERF_PMU_CAP_NO_INTERRUPT 0x01 176 + #define PERF_PMU_CAP_NO_NMI 0x02 176 177 177 178 /** 178 179 * struct pmu - generic performance monitoring unit ··· 458 457 struct pid_namespace *ns; 459 458 u64 id; 460 459 460 + u64 (*clock)(void); 461 461 perf_overflow_handler_t overflow_handler; 462 462 void *overflow_handler_context; 463 463

+1 -1

include/linux/regulator/driver.h

··· 316 316 * @driver_data: private regulator data 317 317 * @of_node: OpenFirmware node to parse for device tree bindings (may be 318 318 * NULL). 319 - * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is 319 + * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is 320 320 * insufficient. 321 321 * @ena_gpio_initialized: GPIO controlling regulator enable was properly 322 322 * initialized, meaning that >= 0 is a valid gpio

+5 -4

include/linux/sched.h

··· 1625 1625 1626 1626 /* 1627 1627 * numa_faults_locality tracks if faults recorded during the last 1628 - * scan window were remote/local. The task scan period is adapted 1629 - * based on the locality of the faults with different weights 1630 - * depending on whether they were shared or private faults 1628 + * scan window were remote/local or failed to migrate. The task scan 1629 + * period is adapted based on the locality of the faults with different 1630 + * weights depending on whether they were shared or private faults 1631 1631 */ 1632 - unsigned long numa_faults_locality[2]; 1632 + unsigned long numa_faults_locality[3]; 1633 1633 1634 1634 unsigned long numa_pages_migrated; 1635 1635 #endif /* CONFIG_NUMA_BALANCING */ ··· 1719 1719 #define TNF_NO_GROUP 0x02 1720 1720 #define TNF_SHARED 0x04 1721 1721 #define TNF_FAULT_LOCAL 0x08 1722 + #define TNF_MIGRATE_FAIL 0x10 1722 1723 1723 1724 #ifdef CONFIG_NUMA_BALANCING 1724 1725 extern void task_numa_fault(int last_node, int node, int pages, int flags);

+8 -8

include/linux/timekeeper_internal.h

··· 16 16 * @read: Read function of @clock 17 17 * @mask: Bitmask for two's complement subtraction of non 64bit clocks 18 18 * @cycle_last: @clock cycle value at last update 19 - * @mult: NTP adjusted multiplier for scaled math conversion 19 + * @mult: (NTP adjusted) multiplier for scaled math conversion 20 20 * @shift: Shift value for scaled math conversion 21 21 * @xtime_nsec: Shifted (fractional) nano seconds offset for readout 22 - * @base_mono: ktime_t (nanoseconds) base time for readout 22 + * @base: ktime_t (nanoseconds) base time for readout 23 23 * 24 24 * This struct has size 56 byte on 64 bit. Together with a seqcount it 25 25 * occupies a single 64byte cache line. 26 26 * 27 27 * The struct is separate from struct timekeeper as it is also used 28 - * for a fast NMI safe accessor to clock monotonic. 28 + * for a fast NMI safe accessors. 29 29 */ 30 30 struct tk_read_base { 31 31 struct clocksource *clock; ··· 35 35 u32 mult; 36 36 u32 shift; 37 37 u64 xtime_nsec; 38 - ktime_t base_mono; 38 + ktime_t base; 39 39 }; 40 40 41 41 /** 42 42 * struct timekeeper - Structure holding internal timekeeping values. 43 - * @tkr: The readout base structure 43 + * @tkr_mono: The readout base structure for CLOCK_MONOTONIC 44 + * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW 44 45 * @xtime_sec: Current CLOCK_REALTIME time in seconds 45 46 * @ktime_sec: Current CLOCK_MONOTONIC time in seconds 46 47 * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset ··· 49 48 * @offs_boot: Offset clock monotonic -> clock boottime 50 49 * @offs_tai: Offset clock monotonic -> clock tai 51 50 * @tai_offset: The current UTC to TAI offset in seconds 52 - * @base_raw: Monotonic raw base time in ktime_t format 53 51 * @raw_time: Monotonic raw base time in timespec64 format 54 52 * @cycle_interval: Number of clock cycles in one NTP interval 55 53 * @xtime_interval: Number of clock shifted nano seconds in one NTP ··· 76 76 * used instead. 77 77 */ 78 78 struct timekeeper { 79 - struct tk_read_base tkr; 79 + struct tk_read_base tkr_mono; 80 + struct tk_read_base tkr_raw; 80 81 u64 xtime_sec; 81 82 unsigned long ktime_sec; 82 83 struct timespec64 wall_to_monotonic; ··· 85 84 ktime_t offs_boot; 86 85 ktime_t offs_tai; 87 86 s32 tai_offset; 88 - ktime_t base_raw; 89 87 struct timespec64 raw_time; 90 88 91 89 /* The following members are for timekeeping internal use */

+6

include/linux/timekeeping.h

··· 214 214 return ktime_to_ns(ktime_get_boottime()); 215 215 } 216 216 217 + static inline u64 ktime_get_tai_ns(void) 218 + { 219 + return ktime_to_ns(ktime_get_clocktai()); 220 + } 221 + 217 222 static inline u64 ktime_get_raw_ns(void) 218 223 { 219 224 return ktime_to_ns(ktime_get_raw()); 220 225 } 221 226 222 227 extern u64 ktime_get_mono_fast_ns(void); 228 + extern u64 ktime_get_raw_fast_ns(void); 223 229 224 230 /* 225 231 * Timespec interfaces utilizing the ktime based ones

+10

include/net/netfilter/nf_log.h

··· 79 79 const struct nf_loginfo *li, 80 80 const char *fmt, ...); 81 81 82 + __printf(8, 9) 83 + void nf_log_trace(struct net *net, 84 + u_int8_t pf, 85 + unsigned int hooknum, 86 + const struct sk_buff *skb, 87 + const struct net_device *in, 88 + const struct net_device *out, 89 + const struct nf_loginfo *li, 90 + const char *fmt, ...); 91 + 82 92 struct nf_log_buf; 83 93 84 94 struct nf_log_buf *nf_log_buf_open(void);

+61 -62

include/trace/events/regmap.h

··· 7 7 #include <linux/ktime.h> 8 8 #include <linux/tracepoint.h> 9 9 10 - struct device; 11 - struct regmap; 10 + #include "../../../drivers/base/regmap/internal.h" 12 11 13 12 /* 14 13 * Log register events 15 14 */ 16 15 DECLARE_EVENT_CLASS(regmap_reg, 17 16 18 - TP_PROTO(struct device *dev, unsigned int reg, 17 + TP_PROTO(struct regmap *map, unsigned int reg, 19 18 unsigned int val), 20 19 21 - TP_ARGS(dev, reg, val), 20 + TP_ARGS(map, reg, val), 22 21 23 22 TP_STRUCT__entry( 24 - __string( name, dev_name(dev) ) 25 - __field( unsigned int, reg ) 26 - __field( unsigned int, val ) 23 + __string( name, regmap_name(map) ) 24 + __field( unsigned int, reg ) 25 + __field( unsigned int, val ) 27 26 ), 28 27 29 28 TP_fast_assign( 30 - __assign_str(name, dev_name(dev)); 29 + __assign_str(name, regmap_name(map)); 31 30 __entry->reg = reg; 32 31 __entry->val = val; 33 32 ), ··· 38 39 39 40 DEFINE_EVENT(regmap_reg, regmap_reg_write, 40 41 41 - TP_PROTO(struct device *dev, unsigned int reg, 42 + TP_PROTO(struct regmap *map, unsigned int reg, 42 43 unsigned int val), 43 44 44 - TP_ARGS(dev, reg, val) 45 + TP_ARGS(map, reg, val) 45 46 46 47 ); 47 48 48 49 DEFINE_EVENT(regmap_reg, regmap_reg_read, 49 50 50 - TP_PROTO(struct device *dev, unsigned int reg, 51 + TP_PROTO(struct regmap *map, unsigned int reg, 51 52 unsigned int val), 52 53 53 - TP_ARGS(dev, reg, val) 54 + TP_ARGS(map, reg, val) 54 55 55 56 ); 56 57 57 58 DEFINE_EVENT(regmap_reg, regmap_reg_read_cache, 58 59 59 - TP_PROTO(struct device *dev, unsigned int reg, 60 + TP_PROTO(struct regmap *map, unsigned int reg, 60 61 unsigned int val), 61 62 62 - TP_ARGS(dev, reg, val) 63 + TP_ARGS(map, reg, val) 63 64 64 65 ); 65 66 66 67 DECLARE_EVENT_CLASS(regmap_block, 67 68 68 - TP_PROTO(struct device *dev, unsigned int reg, int count), 69 + TP_PROTO(struct regmap *map, unsigned int reg, int count), 69 70 70 - TP_ARGS(dev, reg, count), 71 + TP_ARGS(map, reg, count), 71 72 72 73 TP_STRUCT__entry( 73 - __string( name, dev_name(dev) ) 74 - __field( unsigned int, reg ) 75 - __field( int, count ) 74 + __string( name, regmap_name(map) ) 75 + __field( unsigned int, reg ) 76 + __field( int, count ) 76 77 ), 77 78 78 79 TP_fast_assign( 79 - __assign_str(name, dev_name(dev)); 80 + __assign_str(name, regmap_name(map)); 80 81 __entry->reg = reg; 81 82 __entry->count = count; 82 83 ), ··· 88 89 89 90 DEFINE_EVENT(regmap_block, regmap_hw_read_start, 90 91 91 - TP_PROTO(struct device *dev, unsigned int reg, int count), 92 + TP_PROTO(struct regmap *map, unsigned int reg, int count), 92 93 93 - TP_ARGS(dev, reg, count) 94 + TP_ARGS(map, reg, count) 94 95 ); 95 96 96 97 DEFINE_EVENT(regmap_block, regmap_hw_read_done, 97 98 98 - TP_PROTO(struct device *dev, unsigned int reg, int count), 99 + TP_PROTO(struct regmap *map, unsigned int reg, int count), 99 100 100 - TP_ARGS(dev, reg, count) 101 + TP_ARGS(map, reg, count) 101 102 ); 102 103 103 104 DEFINE_EVENT(regmap_block, regmap_hw_write_start, 104 105 105 - TP_PROTO(struct device *dev, unsigned int reg, int count), 106 + TP_PROTO(struct regmap *map, unsigned int reg, int count), 106 107 107 - TP_ARGS(dev, reg, count) 108 + TP_ARGS(map, reg, count) 108 109 ); 109 110 110 111 DEFINE_EVENT(regmap_block, regmap_hw_write_done, 111 112 112 - TP_PROTO(struct device *dev, unsigned int reg, int count), 113 + TP_PROTO(struct regmap *map, unsigned int reg, int count), 113 114 114 - TP_ARGS(dev, reg, count) 115 + TP_ARGS(map, reg, count) 115 116 ); 116 117 117 118 TRACE_EVENT(regcache_sync, 118 119 119 - TP_PROTO(struct device *dev, const char *type, 120 + TP_PROTO(struct regmap *map, const char *type, 120 121 const char *status), 121 122 122 - TP_ARGS(dev, type, status), 123 + TP_ARGS(map, type, status), 123 124 124 125 TP_STRUCT__entry( 125 - __string( name, dev_name(dev) ) 126 - __string( status, status ) 127 - __string( type, type ) 128 - __field( int, type ) 126 + __string( name, regmap_name(map) ) 127 + __string( status, status ) 128 + __string( type, type ) 129 + __field( int, type ) 129 130 ), 130 131 131 132 TP_fast_assign( 132 - __assign_str(name, dev_name(dev)); 133 + __assign_str(name, regmap_name(map)); 133 134 __assign_str(status, status); 134 135 __assign_str(type, type); 135 136 ), ··· 140 141 141 142 DECLARE_EVENT_CLASS(regmap_bool, 142 143 143 - TP_PROTO(struct device *dev, bool flag), 144 + TP_PROTO(struct regmap *map, bool flag), 144 145 145 - TP_ARGS(dev, flag), 146 + TP_ARGS(map, flag), 146 147 147 148 TP_STRUCT__entry( 148 - __string( name, dev_name(dev) ) 149 - __field( int, flag ) 149 + __string( name, regmap_name(map) ) 150 + __field( int, flag ) 150 151 ), 151 152 152 153 TP_fast_assign( 153 - __assign_str(name, dev_name(dev)); 154 + __assign_str(name, regmap_name(map)); 154 155 __entry->flag = flag; 155 156 ), 156 157 ··· 160 161 161 162 DEFINE_EVENT(regmap_bool, regmap_cache_only, 162 163 163 - TP_PROTO(struct device *dev, bool flag), 164 + TP_PROTO(struct regmap *map, bool flag), 164 165 165 - TP_ARGS(dev, flag) 166 + TP_ARGS(map, flag) 166 167 167 168 ); 168 169 169 170 DEFINE_EVENT(regmap_bool, regmap_cache_bypass, 170 171 171 - TP_PROTO(struct device *dev, bool flag), 172 + TP_PROTO(struct regmap *map, bool flag), 172 173 173 - TP_ARGS(dev, flag) 174 + TP_ARGS(map, flag) 174 175 175 176 ); 176 177 177 178 DECLARE_EVENT_CLASS(regmap_async, 178 179 179 - TP_PROTO(struct device *dev), 180 + TP_PROTO(struct regmap *map), 180 181 181 - TP_ARGS(dev), 182 + TP_ARGS(map), 182 183 183 184 TP_STRUCT__entry( 184 - __string( name, dev_name(dev) ) 185 + __string( name, regmap_name(map) ) 185 186 ), 186 187 187 188 TP_fast_assign( 188 - __assign_str(name, dev_name(dev)); 189 + __assign_str(name, regmap_name(map)); 189 190 ), 190 191 191 192 TP_printk("%s", __get_str(name)) ··· 193 194 194 195 DEFINE_EVENT(regmap_block, regmap_async_write_start, 195 196 196 - TP_PROTO(struct device *dev, unsigned int reg, int count), 197 + TP_PROTO(struct regmap *map, unsigned int reg, int count), 197 198 198 - TP_ARGS(dev, reg, count) 199 + TP_ARGS(map, reg, count) 199 200 ); 200 201 201 202 DEFINE_EVENT(regmap_async, regmap_async_io_complete, 202 203 203 - TP_PROTO(struct device *dev), 204 + TP_PROTO(struct regmap *map), 204 205 205 - TP_ARGS(dev) 206 + TP_ARGS(map) 206 207 207 208 ); 208 209 209 210 DEFINE_EVENT(regmap_async, regmap_async_complete_start, 210 211 211 - TP_PROTO(struct device *dev), 212 + TP_PROTO(struct regmap *map), 212 213 213 - TP_ARGS(dev) 214 + TP_ARGS(map) 214 215 215 216 ); 216 217 217 218 DEFINE_EVENT(regmap_async, regmap_async_complete_done, 218 219 219 - TP_PROTO(struct device *dev), 220 + TP_PROTO(struct regmap *map), 220 221 221 - TP_ARGS(dev) 222 + TP_ARGS(map) 222 223 223 224 ); 224 225 225 226 TRACE_EVENT(regcache_drop_region, 226 227 227 - TP_PROTO(struct device *dev, unsigned int from, 228 + TP_PROTO(struct regmap *map, unsigned int from, 228 229 unsigned int to), 229 230 230 - TP_ARGS(dev, from, to), 231 + TP_ARGS(map, from, to), 231 232 232 233 TP_STRUCT__entry( 233 - __string( name, dev_name(dev) ) 234 - __field( unsigned int, from ) 235 - __field( unsigned int, to ) 234 + __string( name, regmap_name(map) ) 235 + __field( unsigned int, from ) 236 + __field( unsigned int, to ) 236 237 ), 237 238 238 239 TP_fast_assign( 239 - __assign_str(name, dev_name(dev)); 240 + __assign_str(name, regmap_name(map)); 240 241 __entry->from = from; 241 242 __entry->to = to; 242 243 ),

+3 -3

include/uapi/linux/perf_event.h

··· 326 326 exclude_callchain_user : 1, /* exclude user callchains */ 327 327 mmap2 : 1, /* include mmap with inode data */ 328 328 comm_exec : 1, /* flag comm events that are due to an exec */ 329 - __reserved_1 : 39; 329 + use_clockid : 1, /* use @clockid for time fields */ 330 + __reserved_1 : 38; 330 331 331 332 union { 332 333 __u32 wakeup_events; /* wakeup every n events */ ··· 356 355 */ 357 356 __u32 sample_stack_user; 358 357 359 - /* Align to u64. */ 360 - __u32 __reserved_2; 358 + __s32 clockid; 361 359 /* 362 360 * Defines set of regs to dump for each sample 363 361 * state captured on:

+74 -3

kernel/events/core.c

··· 327 327 return local_clock(); 328 328 } 329 329 330 + static inline u64 perf_event_clock(struct perf_event *event) 331 + { 332 + return event->clock(); 333 + } 334 + 330 335 static inline struct perf_cpu_context * 331 336 __get_cpu_context(struct perf_event_context *ctx) 332 337 { ··· 4767 4762 } 4768 4763 4769 4764 if (sample_type & PERF_SAMPLE_TIME) 4770 - data->time = perf_clock(); 4765 + data->time = perf_event_clock(event); 4771 4766 4772 4767 if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) 4773 4768 data->id = primary_event_id(event); ··· 5345 5340 task_event->event_id.tid = perf_event_tid(event, task); 5346 5341 task_event->event_id.ptid = perf_event_tid(event, current); 5347 5342 5343 + task_event->event_id.time = perf_event_clock(event); 5344 + 5348 5345 perf_output_put(&handle, task_event->event_id); 5349 5346 5350 5347 perf_event__output_id_sample(event, &handle, &sample); ··· 5380 5373 /* .ppid */ 5381 5374 /* .tid */ 5382 5375 /* .ptid */ 5383 - .time = perf_clock(), 5376 + /* .time */ 5384 5377 }, 5385 5378 }; 5386 5379 ··· 5756 5749 .misc = 0, 5757 5750 .size = sizeof(throttle_event), 5758 5751 }, 5759 - .time = perf_clock(), 5752 + .time = perf_event_clock(event), 5760 5753 .id = primary_event_id(event), 5761 5754 .stream_id = event->id, 5762 5755 }; ··· 6300 6293 static struct pmu perf_swevent = { 6301 6294 .task_ctx_nr = perf_sw_context, 6302 6295 6296 + .capabilities = PERF_PMU_CAP_NO_NMI, 6297 + 6303 6298 .event_init = perf_swevent_init, 6304 6299 .add = perf_swevent_add, 6305 6300 .del = perf_swevent_del, ··· 6645 6636 static struct pmu perf_cpu_clock = { 6646 6637 .task_ctx_nr = perf_sw_context, 6647 6638 6639 + .capabilities = PERF_PMU_CAP_NO_NMI, 6640 + 6648 6641 .event_init = cpu_clock_event_init, 6649 6642 .add = cpu_clock_event_add, 6650 6643 .del = cpu_clock_event_del, ··· 6725 6714 6726 6715 static struct pmu perf_task_clock = { 6727 6716 .task_ctx_nr = perf_sw_context, 6717 + 6718 + .capabilities = PERF_PMU_CAP_NO_NMI, 6728 6719 6729 6720 .event_init = task_clock_event_init, 6730 6721 .add = task_clock_event_add, ··· 7213 7200 event->hw.target = task; 7214 7201 } 7215 7202 7203 + event->clock = &local_clock; 7204 + if (parent_event) 7205 + event->clock = parent_event->clock; 7206 + 7216 7207 if (!overflow_handler && parent_event) { 7217 7208 overflow_handler = parent_event->overflow_handler; 7218 7209 context = parent_event->overflow_handler_context; ··· 7439 7422 if (output_event->cpu == -1 && output_event->ctx != event->ctx) 7440 7423 goto out; 7441 7424 7425 + /* 7426 + * Mixing clocks in the same buffer is trouble you don't need. 7427 + */ 7428 + if (output_event->clock != event->clock) 7429 + goto out; 7430 + 7442 7431 set: 7443 7432 mutex_lock(&event->mmap_mutex); 7444 7433 /* Can't redirect output if we've got an active mmap() */ ··· 7475 7452 7476 7453 mutex_lock(a); 7477 7454 mutex_lock_nested(b, SINGLE_DEPTH_NESTING); 7455 + } 7456 + 7457 + static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) 7458 + { 7459 + bool nmi_safe = false; 7460 + 7461 + switch (clk_id) { 7462 + case CLOCK_MONOTONIC: 7463 + event->clock = &ktime_get_mono_fast_ns; 7464 + nmi_safe = true; 7465 + break; 7466 + 7467 + case CLOCK_MONOTONIC_RAW: 7468 + event->clock = &ktime_get_raw_fast_ns; 7469 + nmi_safe = true; 7470 + break; 7471 + 7472 + case CLOCK_REALTIME: 7473 + event->clock = &ktime_get_real_ns; 7474 + break; 7475 + 7476 + case CLOCK_BOOTTIME: 7477 + event->clock = &ktime_get_boot_ns; 7478 + break; 7479 + 7480 + case CLOCK_TAI: 7481 + event->clock = &ktime_get_tai_ns; 7482 + break; 7483 + 7484 + default: 7485 + return -EINVAL; 7486 + } 7487 + 7488 + if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI)) 7489 + return -EINVAL; 7490 + 7491 + return 0; 7478 7492 } 7479 7493 7480 7494 /** ··· 7629 7569 */ 7630 7570 pmu = event->pmu; 7631 7571 7572 + if (attr.use_clockid) { 7573 + err = perf_event_set_clock(event, attr.clockid); 7574 + if (err) 7575 + goto err_alloc; 7576 + } 7577 + 7632 7578 if (group_leader && 7633 7579 (is_software_event(event) != is_software_event(group_leader))) { 7634 7580 if (is_software_event(event)) { ··· 7684 7618 */ 7685 7619 if (group_leader->group_leader != group_leader) 7686 7620 goto err_context; 7621 + 7622 + /* All events in a group should have the same clock */ 7623 + if (group_leader->clock != event->clock) 7624 + goto err_context; 7625 + 7687 7626 /* 7688 7627 * Do not allow to attach to a group in a different 7689 7628 * task or CPU context:

+6 -2

kernel/sched/fair.c

··· 1609 1609 /* 1610 1610 * If there were no record hinting faults then either the task is 1611 1611 * completely idle or all activity is areas that are not of interest 1612 - * to automatic numa balancing. Scan slower 1612 + * to automatic numa balancing. Related to that, if there were failed 1613 + * migration then it implies we are migrating too quickly or the local 1614 + * node is overloaded. In either case, scan slower 1613 1615 */ 1614 - if (local + shared == 0) { 1616 + if (local + shared == 0 || p->numa_faults_locality[2]) { 1615 1617 p->numa_scan_period = min(p->numa_scan_period_max, 1616 1618 p->numa_scan_period << 1); 1617 1619 ··· 2082 2080 2083 2081 if (migrated) 2084 2082 p->numa_pages_migrated += pages; 2083 + if (flags & TNF_MIGRATE_FAIL) 2084 + p->numa_faults_locality[2] += pages; 2085 2085 2086 2086 p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; 2087 2087 p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;

+86 -2

kernel/time/clockevents.c

··· 94 94 } 95 95 EXPORT_SYMBOL_GPL(clockevent_delta2ns); 96 96 97 + static int __clockevents_set_mode(struct clock_event_device *dev, 98 + enum clock_event_mode mode) 99 + { 100 + /* Transition with legacy set_mode() callback */ 101 + if (dev->set_mode) { 102 + /* Legacy callback doesn't support new modes */ 103 + if (mode > CLOCK_EVT_MODE_RESUME) 104 + return -ENOSYS; 105 + dev->set_mode(mode, dev); 106 + return 0; 107 + } 108 + 109 + if (dev->features & CLOCK_EVT_FEAT_DUMMY) 110 + return 0; 111 + 112 + /* Transition with new mode-specific callbacks */ 113 + switch (mode) { 114 + case CLOCK_EVT_MODE_UNUSED: 115 + /* 116 + * This is an internal state, which is guaranteed to go from 117 + * SHUTDOWN to UNUSED. No driver interaction required. 118 + */ 119 + return 0; 120 + 121 + case CLOCK_EVT_MODE_SHUTDOWN: 122 + return dev->set_mode_shutdown(dev); 123 + 124 + case CLOCK_EVT_MODE_PERIODIC: 125 + /* Core internal bug */ 126 + if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) 127 + return -ENOSYS; 128 + return dev->set_mode_periodic(dev); 129 + 130 + case CLOCK_EVT_MODE_ONESHOT: 131 + /* Core internal bug */ 132 + if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) 133 + return -ENOSYS; 134 + return dev->set_mode_oneshot(dev); 135 + 136 + case CLOCK_EVT_MODE_RESUME: 137 + /* Optional callback */ 138 + if (dev->set_mode_resume) 139 + return dev->set_mode_resume(dev); 140 + else 141 + return 0; 142 + 143 + default: 144 + return -ENOSYS; 145 + } 146 + } 147 + 97 148 /** 98 149 * clockevents_set_mode - set the operating mode of a clock event device 99 150 * @dev: device to modify ··· 156 105 enum clock_event_mode mode) 157 106 { 158 107 if (dev->mode != mode) { 159 - dev->set_mode(mode, dev); 108 + if (__clockevents_set_mode(dev, mode)) 109 + return; 110 + 160 111 dev->mode = mode; 161 112 162 113 /* ··· 426 373 } 427 374 EXPORT_SYMBOL_GPL(clockevents_unbind); 428 375 376 + /* Sanity check of mode transition callbacks */ 377 + static int clockevents_sanity_check(struct clock_event_device *dev) 378 + { 379 + /* Legacy set_mode() callback */ 380 + if (dev->set_mode) { 381 + /* We shouldn't be supporting new modes now */ 382 + WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot || 383 + dev->set_mode_shutdown || dev->set_mode_resume); 384 + return 0; 385 + } 386 + 387 + if (dev->features & CLOCK_EVT_FEAT_DUMMY) 388 + return 0; 389 + 390 + /* New mode-specific callbacks */ 391 + if (!dev->set_mode_shutdown) 392 + return -EINVAL; 393 + 394 + if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && 395 + !dev->set_mode_periodic) 396 + return -EINVAL; 397 + 398 + if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && 399 + !dev->set_mode_oneshot) 400 + return -EINVAL; 401 + 402 + return 0; 403 + } 404 + 429 405 /** 430 406 * clockevents_register_device - register a clock event device 431 407 * @dev: device to register ··· 464 382 unsigned long flags; 465 383 466 384 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 385 + BUG_ON(clockevents_sanity_check(dev)); 386 + 467 387 if (!dev->cpumask) { 468 388 WARN_ON(num_possible_cpus() > 1); 469 389 dev->cpumask = cpumask_of(smp_processor_id()); ··· 533 449 return clockevents_program_event(dev, dev->next_event, false); 534 450 535 451 if (dev->mode == CLOCK_EVT_MODE_PERIODIC) 536 - dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev); 452 + return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); 537 453 538 454 return 0; 539 455 }

+76 -92

kernel/time/clocksource.c

··· 142 142 schedule_work(&watchdog_work); 143 143 } 144 144 145 - static void clocksource_unstable(struct clocksource *cs, int64_t delta) 146 - { 147 - printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", 148 - cs->name, delta); 149 - __clocksource_unstable(cs); 150 - } 151 - 152 145 /** 153 146 * clocksource_mark_unstable - mark clocksource unstable via watchdog 154 147 * @cs: clocksource to be marked unstable ··· 167 174 static void clocksource_watchdog(unsigned long data) 168 175 { 169 176 struct clocksource *cs; 170 - cycle_t csnow, wdnow, delta; 177 + cycle_t csnow, wdnow, cslast, wdlast, delta; 171 178 int64_t wd_nsec, cs_nsec; 172 179 int next_cpu, reset_pending; 173 180 ··· 206 213 207 214 delta = clocksource_delta(csnow, cs->cs_last, cs->mask); 208 215 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); 216 + wdlast = cs->wd_last; /* save these in case we print them */ 217 + cslast = cs->cs_last; 209 218 cs->cs_last = csnow; 210 219 cs->wd_last = wdnow; 211 220 ··· 216 221 217 222 /* Check the deviation from the watchdog clocksource. */ 218 223 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { 219 - clocksource_unstable(cs, cs_nsec - wd_nsec); 224 + pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name); 225 + pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", 226 + watchdog->name, wdnow, wdlast, watchdog->mask); 227 + pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", 228 + cs->name, csnow, cslast, cs->mask); 229 + __clocksource_unstable(cs); 220 230 continue; 221 231 } 222 232 ··· 469 469 * @shift: cycle to nanosecond divisor (power of two) 470 470 * @maxadj: maximum adjustment value to mult (~11%) 471 471 * @mask: bitmask for two's complement subtraction of non 64 bit counters 472 + * @max_cyc: maximum cycle value before potential overflow (does not include 473 + * any safety margin) 474 + * 475 + * NOTE: This function includes a safety margin of 50%, so that bad clock values 476 + * can be detected. 472 477 */ 473 - u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) 478 + u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) 474 479 { 475 480 u64 max_nsecs, max_cycles; 476 481 477 482 /* 478 483 * Calculate the maximum number of cycles that we can pass to the 479 - * cyc2ns function without overflowing a 64-bit signed result. The 480 - * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj) 481 - * which is equivalent to the below. 482 - * max_cycles < (2^63)/(mult + maxadj) 483 - * max_cycles < 2^(log2((2^63)/(mult + maxadj))) 484 - * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) 485 - * max_cycles < 2^(63 - log2(mult + maxadj)) 486 - * max_cycles < 1 << (63 - log2(mult + maxadj)) 487 - * Please note that we add 1 to the result of the log2 to account for 488 - * any rounding errors, ensure the above inequality is satisfied and 489 - * no overflow will occur. 484 + * cyc2ns() function without overflowing a 64-bit result. 490 485 */ 491 - max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); 486 + max_cycles = ULLONG_MAX; 487 + do_div(max_cycles, mult+maxadj); 492 488 493 489 /* 494 490 * The actual maximum number of cycles we can defer the clocksource is ··· 495 499 max_cycles = min(max_cycles, mask); 496 500 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); 497 501 502 + /* return the max_cycles value as well if requested */ 503 + if (max_cyc) 504 + *max_cyc = max_cycles; 505 + 506 + /* Return 50% of the actual maximum, so we can detect bad values */ 507 + max_nsecs >>= 1; 508 + 498 509 return max_nsecs; 499 510 } 500 511 501 512 /** 502 - * clocksource_max_deferment - Returns max time the clocksource can be deferred 503 - * @cs: Pointer to clocksource 513 + * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles 514 + * @cs: Pointer to clocksource to be updated 504 515 * 505 516 */ 506 - static u64 clocksource_max_deferment(struct clocksource *cs) 517 + static inline void clocksource_update_max_deferment(struct clocksource *cs) 507 518 { 508 - u64 max_nsecs; 509 - 510 - max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, 511 - cs->mask); 512 - /* 513 - * To ensure that the clocksource does not wrap whilst we are idle, 514 - * limit the time the clocksource can be deferred by 12.5%. Please 515 - * note a margin of 12.5% is used because this can be computed with 516 - * a shift, versus say 10% which would require division. 517 - */ 518 - return max_nsecs - (max_nsecs >> 3); 519 + cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, 520 + cs->maxadj, cs->mask, 521 + &cs->max_cycles); 519 522 } 520 523 521 524 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET ··· 643 648 } 644 649 645 650 /** 646 - * __clocksource_updatefreq_scale - Used update clocksource with new freq 651 + * __clocksource_update_freq_scale - Used update clocksource with new freq 647 652 * @cs: clocksource to be registered 648 653 * @scale: Scale factor multiplied against freq to get clocksource hz 649 654 * @freq: clocksource frequency (cycles per second) divided by scale ··· 651 656 * This should only be called from the clocksource->enable() method. 652 657 * 653 658 * This *SHOULD NOT* be called directly! Please use the 654 - * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. 659 + * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper 660 + * functions. 655 661 */ 656 - void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) 662 + void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq) 657 663 { 658 664 u64 sec; 665 + 659 666 /* 660 - * Calc the maximum number of seconds which we can run before 661 - * wrapping around. For clocksources which have a mask > 32bit 662 - * we need to limit the max sleep time to have a good 663 - * conversion precision. 10 minutes is still a reasonable 664 - * amount. That results in a shift value of 24 for a 665 - * clocksource with mask >= 40bit and f >= 4GHz. That maps to 666 - * ~ 0.06ppm granularity for NTP. We apply the same 12.5% 667 - * margin as we do in clocksource_max_deferment() 667 + * Default clocksources are *special* and self-define their mult/shift. 668 + * But, you're not special, so you should specify a freq value. 668 669 */ 669 - sec = (cs->mask - (cs->mask >> 3)); 670 - do_div(sec, freq); 671 - do_div(sec, scale); 672 - if (!sec) 673 - sec = 1; 674 - else if (sec > 600 && cs->mask > UINT_MAX) 675 - sec = 600; 670 + if (freq) { 671 + /* 672 + * Calc the maximum number of seconds which we can run before 673 + * wrapping around. For clocksources which have a mask > 32-bit 674 + * we need to limit the max sleep time to have a good 675 + * conversion precision. 10 minutes is still a reasonable 676 + * amount. That results in a shift value of 24 for a 677 + * clocksource with mask >= 40-bit and f >= 4GHz. That maps to 678 + * ~ 0.06ppm granularity for NTP. 679 + */ 680 + sec = cs->mask; 681 + do_div(sec, freq); 682 + do_div(sec, scale); 683 + if (!sec) 684 + sec = 1; 685 + else if (sec > 600 && cs->mask > UINT_MAX) 686 + sec = 600; 676 687 677 - clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, 678 - NSEC_PER_SEC / scale, sec * scale); 679 - 688 + clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, 689 + NSEC_PER_SEC / scale, sec * scale); 690 + } 680 691 /* 681 - * for clocksources that have large mults, to avoid overflow. 682 - * Since mult may be adjusted by ntp, add an safety extra margin 683 - * 692 + * Ensure clocksources that have large 'mult' values don't overflow 693 + * when adjusted. 684 694 */ 685 695 cs->maxadj = clocksource_max_adjustment(cs); 686 - while ((cs->mult + cs->maxadj < cs->mult) 687 - || (cs->mult - cs->maxadj > cs->mult)) { 696 + while (freq && ((cs->mult + cs->maxadj < cs->mult) 697 + || (cs->mult - cs->maxadj > cs->mult))) { 688 698 cs->mult >>= 1; 689 699 cs->shift--; 690 700 cs->maxadj = clocksource_max_adjustment(cs); 691 701 } 692 702 693 - cs->max_idle_ns = clocksource_max_deferment(cs); 703 + /* 704 + * Only warn for *special* clocksources that self-define 705 + * their mult/shift values and don't specify a freq. 706 + */ 707 + WARN_ONCE(cs->mult + cs->maxadj < cs->mult, 708 + "timekeeping: Clocksource %s might overflow on 11%% adjustment\n", 709 + cs->name); 710 + 711 + clocksource_update_max_deferment(cs); 712 + 713 + pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", 714 + cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); 694 715 } 695 - EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); 716 + EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); 696 717 697 718 /** 698 719 * __clocksource_register_scale - Used to install new clocksources ··· 725 714 { 726 715 727 716 /* Initialize mult/shift and max_idle_ns */ 728 - __clocksource_updatefreq_scale(cs, scale, freq); 717 + __clocksource_update_freq_scale(cs, scale, freq); 729 718 730 719 /* Add clocksource to the clocksource list */ 731 720 mutex_lock(&clocksource_mutex); ··· 736 725 return 0; 737 726 } 738 727 EXPORT_SYMBOL_GPL(__clocksource_register_scale); 739 - 740 - 741 - /** 742 - * clocksource_register - Used to install new clocksources 743 - * @cs: clocksource to be registered 744 - * 745 - * Returns -EBUSY if registration fails, zero otherwise. 746 - */ 747 - int clocksource_register(struct clocksource *cs) 748 - { 749 - /* calculate max adjustment for given mult/shift */ 750 - cs->maxadj = clocksource_max_adjustment(cs); 751 - WARN_ONCE(cs->mult + cs->maxadj < cs->mult, 752 - "Clocksource %s might overflow on 11%% adjustment\n", 753 - cs->name); 754 - 755 - /* calculate max idle time permitted for this clocksource */ 756 - cs->max_idle_ns = clocksource_max_deferment(cs); 757 - 758 - mutex_lock(&clocksource_mutex); 759 - clocksource_enqueue(cs); 760 - clocksource_enqueue_watchdog(cs); 761 - clocksource_select(); 762 - mutex_unlock(&clocksource_mutex); 763 - return 0; 764 - } 765 - EXPORT_SYMBOL(clocksource_register); 766 728 767 729 static void __clocksource_change_rating(struct clocksource *cs, int rating) 768 730 {

+3 -2

kernel/time/jiffies.c

··· 71 71 .mask = 0xffffffff, /*32bits*/ 72 72 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ 73 73 .shift = JIFFIES_SHIFT, 74 + .max_cycles = 10, 74 75 }; 75 76 76 77 __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); ··· 95 94 96 95 static int __init init_jiffies_clocksource(void) 97 96 { 98 - return clocksource_register(&clocksource_jiffies); 97 + return __clocksource_register(&clocksource_jiffies); 99 98 } 100 99 101 100 core_initcall(init_jiffies_clocksource); ··· 131 130 132 131 refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT; 133 132 134 - clocksource_register(&refined_jiffies); 133 + __clocksource_register(&refined_jiffies); 135 134 return 0; 136 135 }

+162 -76

kernel/time/sched_clock.c

··· 1 1 /* 2 - * sched_clock.c: support for extending counters to full 64-bit ns counter 2 + * sched_clock.c: Generic sched_clock() support, to extend low level 3 + * hardware time counters to full 64-bit ns values. 3 4 * 4 5 * This program is free software; you can redistribute it and/or modify 5 6 * it under the terms of the GNU General Public License version 2 as ··· 19 18 #include <linux/seqlock.h> 20 19 #include <linux/bitops.h> 21 20 22 - struct clock_data { 23 - ktime_t wrap_kt; 21 + /** 22 + * struct clock_read_data - data required to read from sched_clock() 23 + * 24 + * @epoch_ns: sched_clock() value at last update 25 + * @epoch_cyc: Clock cycle value at last update. 26 + * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit 27 + * clocks. 28 + * @read_sched_clock: Current clock source (or dummy source when suspended). 29 + * @mult: Multipler for scaled math conversion. 30 + * @shift: Shift value for scaled math conversion. 31 + * 32 + * Care must be taken when updating this structure; it is read by 33 + * some very hot code paths. It occupies <=40 bytes and, when combined 34 + * with the seqcount used to synchronize access, comfortably fits into 35 + * a 64 byte cache line. 36 + */ 37 + struct clock_read_data { 24 38 u64 epoch_ns; 25 39 u64 epoch_cyc; 26 - seqcount_t seq; 27 - unsigned long rate; 40 + u64 sched_clock_mask; 41 + u64 (*read_sched_clock)(void); 28 42 u32 mult; 29 43 u32 shift; 30 - bool suspended; 44 + }; 45 + 46 + /** 47 + * struct clock_data - all data needed for sched_clock() (including 48 + * registration of a new clock source) 49 + * 50 + * @seq: Sequence counter for protecting updates. The lowest 51 + * bit is the index for @read_data. 52 + * @read_data: Data required to read from sched_clock. 53 + * @wrap_kt: Duration for which clock can run before wrapping. 54 + * @rate: Tick rate of the registered clock. 55 + * @actual_read_sched_clock: Registered hardware level clock read function. 56 + * 57 + * The ordering of this structure has been chosen to optimize cache 58 + * performance. In particular 'seq' and 'read_data[0]' (combined) should fit 59 + * into a single 64-byte cache line. 60 + */ 61 + struct clock_data { 62 + seqcount_t seq; 63 + struct clock_read_data read_data[2]; 64 + ktime_t wrap_kt; 65 + unsigned long rate; 66 + 67 + u64 (*actual_read_sched_clock)(void); 31 68 }; 32 69 33 70 static struct hrtimer sched_clock_timer; 34 71 static int irqtime = -1; 35 72 36 73 core_param(irqtime, irqtime, int, 0400); 37 - 38 - static struct clock_data cd = { 39 - .mult = NSEC_PER_SEC / HZ, 40 - }; 41 - 42 - static u64 __read_mostly sched_clock_mask; 43 74 44 75 static u64 notrace jiffy_sched_clock_read(void) 45 76 { ··· 82 49 return (u64)(jiffies - INITIAL_JIFFIES); 83 50 } 84 51 85 - static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; 52 + static struct clock_data cd ____cacheline_aligned = { 53 + .read_data[0] = { .mult = NSEC_PER_SEC / HZ, 54 + .read_sched_clock = jiffy_sched_clock_read, }, 55 + .actual_read_sched_clock = jiffy_sched_clock_read, 56 + }; 86 57 87 58 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) 88 59 { ··· 95 58 96 59 unsigned long long notrace sched_clock(void) 97 60 { 98 - u64 epoch_ns; 99 - u64 epoch_cyc; 100 - u64 cyc; 61 + u64 cyc, res; 101 62 unsigned long seq; 102 - 103 - if (cd.suspended) 104 - return cd.epoch_ns; 63 + struct clock_read_data *rd; 105 64 106 65 do { 107 - seq = raw_read_seqcount_begin(&cd.seq); 108 - epoch_cyc = cd.epoch_cyc; 109 - epoch_ns = cd.epoch_ns; 66 + seq = raw_read_seqcount(&cd.seq); 67 + rd = cd.read_data + (seq & 1); 68 + 69 + cyc = (rd->read_sched_clock() - rd->epoch_cyc) & 70 + rd->sched_clock_mask; 71 + res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift); 110 72 } while (read_seqcount_retry(&cd.seq, seq)); 111 73 112 - cyc = read_sched_clock(); 113 - cyc = (cyc - epoch_cyc) & sched_clock_mask; 114 - return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); 74 + return res; 115 75 } 116 76 117 77 /* 118 - * Atomically update the sched_clock epoch. 78 + * Updating the data required to read the clock. 79 + * 80 + * sched_clock() will never observe mis-matched data even if called from 81 + * an NMI. We do this by maintaining an odd/even copy of the data and 82 + * steering sched_clock() to one or the other using a sequence counter. 83 + * In order to preserve the data cache profile of sched_clock() as much 84 + * as possible the system reverts back to the even copy when the update 85 + * completes; the odd copy is used *only* during an update. 119 86 */ 120 - static void notrace update_sched_clock(void) 87 + static void update_clock_read_data(struct clock_read_data *rd) 121 88 { 122 - unsigned long flags; 89 + /* update the backup (odd) copy with the new data */ 90 + cd.read_data[1] = *rd; 91 + 92 + /* steer readers towards the odd copy */ 93 + raw_write_seqcount_latch(&cd.seq); 94 + 95 + /* now its safe for us to update the normal (even) copy */ 96 + cd.read_data[0] = *rd; 97 + 98 + /* switch readers back to the even copy */ 99 + raw_write_seqcount_latch(&cd.seq); 100 + } 101 + 102 + /* 103 + * Atomically update the sched_clock() epoch. 104 + */ 105 + static void update_sched_clock(void) 106 + { 123 107 u64 cyc; 124 108 u64 ns; 109 + struct clock_read_data rd; 125 110 126 - cyc = read_sched_clock(); 127 - ns = cd.epoch_ns + 128 - cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, 129 - cd.mult, cd.shift); 111 + rd = cd.read_data[0]; 130 112 131 - raw_local_irq_save(flags); 132 - raw_write_seqcount_begin(&cd.seq); 133 - cd.epoch_ns = ns; 134 - cd.epoch_cyc = cyc; 135 - raw_write_seqcount_end(&cd.seq); 136 - raw_local_irq_restore(flags); 113 + cyc = cd.actual_read_sched_clock(); 114 + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); 115 + 116 + rd.epoch_ns = ns; 117 + rd.epoch_cyc = cyc; 118 + 119 + update_clock_read_data(&rd); 137 120 } 138 121 139 122 static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) 140 123 { 141 124 update_sched_clock(); 142 125 hrtimer_forward_now(hrt, cd.wrap_kt); 126 + 143 127 return HRTIMER_RESTART; 144 128 } 145 129 146 - void __init sched_clock_register(u64 (*read)(void), int bits, 147 - unsigned long rate) 130 + void __init 131 + sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) 148 132 { 149 133 u64 res, wrap, new_mask, new_epoch, cyc, ns; 150 134 u32 new_mult, new_shift; 151 - ktime_t new_wrap_kt; 152 135 unsigned long r; 153 136 char r_unit; 137 + struct clock_read_data rd; 154 138 155 139 if (cd.rate > rate) 156 140 return; 157 141 158 142 WARN_ON(!irqs_disabled()); 159 143 160 - /* calculate the mult/shift to convert counter ticks to ns. */ 144 + /* Calculate the mult/shift to convert counter ticks to ns. */ 161 145 clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); 162 146 163 147 new_mask = CLOCKSOURCE_MASK(bits); 164 - 165 - /* calculate how many ns until we wrap */ 166 - wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); 167 - new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); 168 - 169 - /* update epoch for new counter and update epoch_ns from old counter*/ 170 - new_epoch = read(); 171 - cyc = read_sched_clock(); 172 - ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, 173 - cd.mult, cd.shift); 174 - 175 - raw_write_seqcount_begin(&cd.seq); 176 - read_sched_clock = read; 177 - sched_clock_mask = new_mask; 178 148 cd.rate = rate; 179 - cd.wrap_kt = new_wrap_kt; 180 - cd.mult = new_mult; 181 - cd.shift = new_shift; 182 - cd.epoch_cyc = new_epoch; 183 - cd.epoch_ns = ns; 184 - raw_write_seqcount_end(&cd.seq); 149 + 150 + /* Calculate how many nanosecs until we risk wrapping */ 151 + wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); 152 + cd.wrap_kt = ns_to_ktime(wrap); 153 + 154 + rd = cd.read_data[0]; 155 + 156 + /* Update epoch for new counter and update 'epoch_ns' from old counter*/ 157 + new_epoch = read(); 158 + cyc = cd.actual_read_sched_clock(); 159 + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); 160 + cd.actual_read_sched_clock = read; 161 + 162 + rd.read_sched_clock = read; 163 + rd.sched_clock_mask = new_mask; 164 + rd.mult = new_mult; 165 + rd.shift = new_shift; 166 + rd.epoch_cyc = new_epoch; 167 + rd.epoch_ns = ns; 168 + 169 + update_clock_read_data(&rd); 185 170 186 171 r = rate; 187 172 if (r >= 4000000) { 188 173 r /= 1000000; 189 174 r_unit = 'M'; 190 - } else if (r >= 1000) { 191 - r /= 1000; 192 - r_unit = 'k'; 193 - } else 194 - r_unit = ' '; 175 + } else { 176 + if (r >= 1000) { 177 + r /= 1000; 178 + r_unit = 'k'; 179 + } else { 180 + r_unit = ' '; 181 + } 182 + } 195 183 196 - /* calculate the ns resolution of this counter */ 184 + /* Calculate the ns resolution of this counter */ 197 185 res = cyc_to_ns(1ULL, new_mult, new_shift); 198 186 199 187 pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", 200 188 bits, r, r_unit, res, wrap); 201 189 202 - /* Enable IRQ time accounting if we have a fast enough sched_clock */ 190 + /* Enable IRQ time accounting if we have a fast enough sched_clock() */ 203 191 if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) 204 192 enable_sched_clock_irqtime(); 205 193 ··· 234 172 void __init sched_clock_postinit(void) 235 173 { 236 174 /* 237 - * If no sched_clock function has been provided at that point, 175 + * If no sched_clock() function has been provided at that point, 238 176 * make it the final one one. 239 177 */ 240 - if (read_sched_clock == jiffy_sched_clock_read) 178 + if (cd.actual_read_sched_clock == jiffy_sched_clock_read) 241 179 sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); 242 180 243 181 update_sched_clock(); ··· 251 189 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); 252 190 } 253 191 192 + /* 193 + * Clock read function for use when the clock is suspended. 194 + * 195 + * This function makes it appear to sched_clock() as if the clock 196 + * stopped counting at its last update. 197 + * 198 + * This function must only be called from the critical 199 + * section in sched_clock(). It relies on the read_seqcount_retry() 200 + * at the end of the critical section to be sure we observe the 201 + * correct copy of 'epoch_cyc'. 202 + */ 203 + static u64 notrace suspended_sched_clock_read(void) 204 + { 205 + unsigned long seq = raw_read_seqcount(&cd.seq); 206 + 207 + return cd.read_data[seq & 1].epoch_cyc; 208 + } 209 + 254 210 static int sched_clock_suspend(void) 255 211 { 212 + struct clock_read_data *rd = &cd.read_data[0]; 213 + 256 214 update_sched_clock(); 257 215 hrtimer_cancel(&sched_clock_timer); 258 - cd.suspended = true; 216 + rd->read_sched_clock = suspended_sched_clock_read; 217 + 259 218 return 0; 260 219 } 261 220 262 221 static void sched_clock_resume(void) 263 222 { 264 - cd.epoch_cyc = read_sched_clock(); 223 + struct clock_read_data *rd = &cd.read_data[0]; 224 + 225 + rd->epoch_cyc = cd.actual_read_sched_clock(); 265 226 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); 266 - cd.suspended = false; 227 + rd->read_sched_clock = cd.actual_read_sched_clock; 267 228 } 268 229 269 230 static struct syscore_ops sched_clock_ops = { 270 - .suspend = sched_clock_suspend, 271 - .resume = sched_clock_resume, 231 + .suspend = sched_clock_suspend, 232 + .resume = sched_clock_resume, 272 233 }; 273 234 274 235 static int __init sched_clock_syscore_init(void) 275 236 { 276 237 register_syscore_ops(&sched_clock_ops); 238 + 277 239 return 0; 278 240 } 279 241 device_initcall(sched_clock_syscore_init);

+233 -112

kernel/time/timekeeping.c

··· 59 59 }; 60 60 61 61 static struct tk_fast tk_fast_mono ____cacheline_aligned; 62 + static struct tk_fast tk_fast_raw ____cacheline_aligned; 62 63 63 64 /* flag for if timekeeping is suspended */ 64 65 int __read_mostly timekeeping_suspended; ··· 69 68 70 69 static inline void tk_normalize_xtime(struct timekeeper *tk) 71 70 { 72 - while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) { 73 - tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift; 71 + while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { 72 + tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 74 73 tk->xtime_sec++; 75 74 } 76 75 } ··· 80 79 struct timespec64 ts; 81 80 82 81 ts.tv_sec = tk->xtime_sec; 83 - ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); 82 + ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 84 83 return ts; 85 84 } 86 85 87 86 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) 88 87 { 89 88 tk->xtime_sec = ts->tv_sec; 90 - tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift; 89 + tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; 91 90 } 92 91 93 92 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) 94 93 { 95 94 tk->xtime_sec += ts->tv_sec; 96 - tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift; 95 + tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; 97 96 tk_normalize_xtime(tk); 98 97 } 99 98 ··· 119 118 tk->offs_boot = ktime_add(tk->offs_boot, delta); 120 119 } 121 120 121 + #ifdef CONFIG_DEBUG_TIMEKEEPING 122 + #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ 123 + /* 124 + * These simple flag variables are managed 125 + * without locks, which is racy, but ok since 126 + * we don't really care about being super 127 + * precise about how many events were seen, 128 + * just that a problem was observed. 129 + */ 130 + static int timekeeping_underflow_seen; 131 + static int timekeeping_overflow_seen; 132 + 133 + /* last_warning is only modified under the timekeeping lock */ 134 + static long timekeeping_last_warning; 135 + 136 + static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) 137 + { 138 + 139 + cycle_t max_cycles = tk->tkr_mono.clock->max_cycles; 140 + const char *name = tk->tkr_mono.clock->name; 141 + 142 + if (offset > max_cycles) { 143 + printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", 144 + offset, name, max_cycles); 145 + printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n"); 146 + } else { 147 + if (offset > (max_cycles >> 1)) { 148 + printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n", 149 + offset, name, max_cycles >> 1); 150 + printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); 151 + } 152 + } 153 + 154 + if (timekeeping_underflow_seen) { 155 + if (jiffies - timekeeping_last_warning > WARNING_FREQ) { 156 + printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name); 157 + printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); 158 + printk_deferred(" Your kernel is probably still fine.\n"); 159 + timekeeping_last_warning = jiffies; 160 + } 161 + timekeeping_underflow_seen = 0; 162 + } 163 + 164 + if (timekeeping_overflow_seen) { 165 + if (jiffies - timekeeping_last_warning > WARNING_FREQ) { 166 + printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name); 167 + printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); 168 + printk_deferred(" Your kernel is probably still fine.\n"); 169 + timekeeping_last_warning = jiffies; 170 + } 171 + timekeeping_overflow_seen = 0; 172 + } 173 + } 174 + 175 + static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) 176 + { 177 + cycle_t now, last, mask, max, delta; 178 + unsigned int seq; 179 + 180 + /* 181 + * Since we're called holding a seqlock, the data may shift 182 + * under us while we're doing the calculation. This can cause 183 + * false positives, since we'd note a problem but throw the 184 + * results away. So nest another seqlock here to atomically 185 + * grab the points we are checking with. 186 + */ 187 + do { 188 + seq = read_seqcount_begin(&tk_core.seq); 189 + now = tkr->read(tkr->clock); 190 + last = tkr->cycle_last; 191 + mask = tkr->mask; 192 + max = tkr->clock->max_cycles; 193 + } while (read_seqcount_retry(&tk_core.seq, seq)); 194 + 195 + delta = clocksource_delta(now, last, mask); 196 + 197 + /* 198 + * Try to catch underflows by checking if we are seeing small 199 + * mask-relative negative values. 200 + */ 201 + if (unlikely((~delta & mask) < (mask >> 3))) { 202 + timekeeping_underflow_seen = 1; 203 + delta = 0; 204 + } 205 + 206 + /* Cap delta value to the max_cycles values to avoid mult overflows */ 207 + if (unlikely(delta > max)) { 208 + timekeeping_overflow_seen = 1; 209 + delta = tkr->clock->max_cycles; 210 + } 211 + 212 + return delta; 213 + } 214 + #else 215 + static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) 216 + { 217 + } 218 + static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) 219 + { 220 + cycle_t cycle_now, delta; 221 + 222 + /* read clocksource */ 223 + cycle_now = tkr->read(tkr->clock); 224 + 225 + /* calculate the delta since the last update_wall_time */ 226 + delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); 227 + 228 + return delta; 229 + } 230 + #endif 231 + 122 232 /** 123 233 * tk_setup_internals - Set up internals to use clocksource clock. 124 234 * ··· 247 135 u64 tmp, ntpinterval; 248 136 struct clocksource *old_clock; 249 137 250 - old_clock = tk->tkr.clock; 251 - tk->tkr.clock = clock; 252 - tk->tkr.read = clock->read; 253 - tk->tkr.mask = clock->mask; 254 - tk->tkr.cycle_last = tk->tkr.read(clock); 138 + old_clock = tk->tkr_mono.clock; 139 + tk->tkr_mono.clock = clock; 140 + tk->tkr_mono.read = clock->read; 141 + tk->tkr_mono.mask = clock->mask; 142 + tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); 143 + 144 + tk->tkr_raw.clock = clock; 145 + tk->tkr_raw.read = clock->read; 146 + tk->tkr_raw.mask = clock->mask; 147 + tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; 255 148 256 149 /* Do the ns -> cycle conversion first, using original mult */ 257 150 tmp = NTP_INTERVAL_LENGTH; ··· 280 163 if (old_clock) { 281 164 int shift_change = clock->shift - old_clock->shift; 282 165 if (shift_change < 0) 283 - tk->tkr.xtime_nsec >>= -shift_change; 166 + tk->tkr_mono.xtime_nsec >>= -shift_change; 284 167 else 285 - tk->tkr.xtime_nsec <<= shift_change; 168 + tk->tkr_mono.xtime_nsec <<= shift_change; 286 169 } 287 - tk->tkr.shift = clock->shift; 170 + tk->tkr_raw.xtime_nsec = 0; 171 + 172 + tk->tkr_mono.shift = clock->shift; 173 + tk->tkr_raw.shift = clock->shift; 288 174 289 175 tk->ntp_error = 0; 290 176 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; ··· 298 178 * active clocksource. These value will be adjusted via NTP 299 179 * to counteract clock drifting. 300 180 */ 301 - tk->tkr.mult = clock->mult; 181 + tk->tkr_mono.mult = clock->mult; 182 + tk->tkr_raw.mult = clock->mult; 302 183 tk->ntp_err_mult = 0; 303 184 } 304 185 ··· 314 193 315 194 static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) 316 195 { 317 - cycle_t cycle_now, delta; 196 + cycle_t delta; 318 197 s64 nsec; 319 198 320 - /* read clocksource: */ 321 - cycle_now = tkr->read(tkr->clock); 322 - 323 - /* calculate the delta since the last update_wall_time: */ 324 - delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); 199 + delta = timekeeping_get_delta(tkr); 325 200 326 201 nsec = delta * tkr->mult + tkr->xtime_nsec; 327 202 nsec >>= tkr->shift; 328 - 329 - /* If arch requires, add in get_arch_timeoffset() */ 330 - return nsec + arch_gettimeoffset(); 331 - } 332 - 333 - static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) 334 - { 335 - struct clocksource *clock = tk->tkr.clock; 336 - cycle_t cycle_now, delta; 337 - s64 nsec; 338 - 339 - /* read clocksource: */ 340 - cycle_now = tk->tkr.read(clock); 341 - 342 - /* calculate the delta since the last update_wall_time: */ 343 - delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); 344 - 345 - /* convert delta to nanoseconds. */ 346 - nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); 347 203 348 204 /* If arch requires, add in get_arch_timeoffset() */ 349 205 return nsec + arch_gettimeoffset(); ··· 365 267 * slightly wrong timestamp (a few nanoseconds). See 366 268 * @ktime_get_mono_fast_ns. 367 269 */ 368 - static void update_fast_timekeeper(struct tk_read_base *tkr) 270 + static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf) 369 271 { 370 - struct tk_read_base *base = tk_fast_mono.base; 272 + struct tk_read_base *base = tkf->base; 371 273 372 274 /* Force readers off to base[1] */ 373 - raw_write_seqcount_latch(&tk_fast_mono.seq); 275 + raw_write_seqcount_latch(&tkf->seq); 374 276 375 277 /* Update base[0] */ 376 278 memcpy(base, tkr, sizeof(*base)); 377 279 378 280 /* Force readers back to base[0] */ 379 - raw_write_seqcount_latch(&tk_fast_mono.seq); 281 + raw_write_seqcount_latch(&tkf->seq); 380 282 381 283 /* Update base[1] */ 382 284 memcpy(base + 1, base, sizeof(*base)); ··· 414 316 * of the following timestamps. Callers need to be aware of that and 415 317 * deal with it. 416 318 */ 417 - u64 notrace ktime_get_mono_fast_ns(void) 319 + static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) 418 320 { 419 321 struct tk_read_base *tkr; 420 322 unsigned int seq; 421 323 u64 now; 422 324 423 325 do { 424 - seq = raw_read_seqcount(&tk_fast_mono.seq); 425 - tkr = tk_fast_mono.base + (seq & 0x01); 426 - now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr); 326 + seq = raw_read_seqcount(&tkf->seq); 327 + tkr = tkf->base + (seq & 0x01); 328 + now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); 329 + } while (read_seqcount_retry(&tkf->seq, seq)); 427 330 428 - } while (read_seqcount_retry(&tk_fast_mono.seq, seq)); 429 331 return now; 430 332 } 333 + 334 + u64 ktime_get_mono_fast_ns(void) 335 + { 336 + return __ktime_get_fast_ns(&tk_fast_mono); 337 + } 431 338 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); 339 + 340 + u64 ktime_get_raw_fast_ns(void) 341 + { 342 + return __ktime_get_fast_ns(&tk_fast_raw); 343 + } 344 + EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); 432 345 433 346 /* Suspend-time cycles value for halted fast timekeeper. */ 434 347 static cycle_t cycles_at_suspend; ··· 462 353 static void halt_fast_timekeeper(struct timekeeper *tk) 463 354 { 464 355 static struct tk_read_base tkr_dummy; 465 - struct tk_read_base *tkr = &tk->tkr; 356 + struct tk_read_base *tkr = &tk->tkr_mono; 466 357 467 358 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 468 359 cycles_at_suspend = tkr->read(tkr->clock); 469 360 tkr_dummy.read = dummy_clock_read; 470 - update_fast_timekeeper(&tkr_dummy); 361 + update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); 362 + 363 + tkr = &tk->tkr_raw; 364 + memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 365 + tkr_dummy.read = dummy_clock_read; 366 + update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); 471 367 } 472 368 473 369 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD ··· 483 369 484 370 xt = timespec64_to_timespec(tk_xtime(tk)); 485 371 wm = timespec64_to_timespec(tk->wall_to_monotonic); 486 - update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult, 487 - tk->tkr.cycle_last); 372 + update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult, 373 + tk->tkr_mono.cycle_last); 488 374 } 489 375 490 376 static inline void old_vsyscall_fixup(struct timekeeper *tk) ··· 501 387 * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD 502 388 * users are removed, this can be killed. 503 389 */ 504 - remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1); 505 - tk->tkr.xtime_nsec -= remainder; 506 - tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift; 390 + remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1); 391 + tk->tkr_mono.xtime_nsec -= remainder; 392 + tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift; 507 393 tk->ntp_error += remainder << tk->ntp_error_shift; 508 - tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift; 394 + tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift; 509 395 } 510 396 #else 511 397 #define old_vsyscall_fixup(tk) ··· 570 456 */ 571 457 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); 572 458 nsec = (u32) tk->wall_to_monotonic.tv_nsec; 573 - tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); 459 + tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); 574 460 575 461 /* Update the monotonic raw base */ 576 - tk->base_raw = timespec64_to_ktime(tk->raw_time); 462 + tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time); 577 463 578 464 /* 579 465 * The sum of the nanoseconds portions of xtime and 580 466 * wall_to_monotonic can be greater/equal one second. Take 581 467 * this into account before updating tk->ktime_sec. 582 468 */ 583 - nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift); 469 + nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 584 470 if (nsec >= NSEC_PER_SEC) 585 471 seconds++; 586 472 tk->ktime_sec = seconds; ··· 603 489 memcpy(&shadow_timekeeper, &tk_core.timekeeper, 604 490 sizeof(tk_core.timekeeper)); 605 491 606 - update_fast_timekeeper(&tk->tkr); 492 + update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); 493 + update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); 607 494 } 608 495 609 496 /** ··· 616 501 */ 617 502 static void timekeeping_forward_now(struct timekeeper *tk) 618 503 { 619 - struct clocksource *clock = tk->tkr.clock; 504 + struct clocksource *clock = tk->tkr_mono.clock; 620 505 cycle_t cycle_now, delta; 621 506 s64 nsec; 622 507 623 - cycle_now = tk->tkr.read(clock); 624 - delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); 625 - tk->tkr.cycle_last = cycle_now; 508 + cycle_now = tk->tkr_mono.read(clock); 509 + delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 510 + tk->tkr_mono.cycle_last = cycle_now; 511 + tk->tkr_raw.cycle_last = cycle_now; 626 512 627 - tk->tkr.xtime_nsec += delta * tk->tkr.mult; 513 + tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult; 628 514 629 515 /* If arch requires, add in get_arch_timeoffset() */ 630 - tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift; 516 + tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift; 631 517 632 518 tk_normalize_xtime(tk); 633 519 634 - nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); 520 + nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift); 635 521 timespec64_add_ns(&tk->raw_time, nsec); 636 522 } 637 523 ··· 653 537 seq = read_seqcount_begin(&tk_core.seq); 654 538 655 539 ts->tv_sec = tk->xtime_sec; 656 - nsecs = timekeeping_get_ns(&tk->tkr); 540 + nsecs = timekeeping_get_ns(&tk->tkr_mono); 657 541 658 542 } while (read_seqcount_retry(&tk_core.seq, seq)); 659 543 ··· 693 577 694 578 do { 695 579 seq = read_seqcount_begin(&tk_core.seq); 696 - base = tk->tkr.base_mono; 697 - nsecs = timekeeping_get_ns(&tk->tkr); 580 + base = tk->tkr_mono.base; 581 + nsecs = timekeeping_get_ns(&tk->tkr_mono); 698 582 699 583 } while (read_seqcount_retry(&tk_core.seq, seq)); 700 584 ··· 719 603 720 604 do { 721 605 seq = read_seqcount_begin(&tk_core.seq); 722 - base = ktime_add(tk->tkr.base_mono, *offset); 723 - nsecs = timekeeping_get_ns(&tk->tkr); 606 + base = ktime_add(tk->tkr_mono.base, *offset); 607 + nsecs = timekeeping_get_ns(&tk->tkr_mono); 724 608 725 609 } while (read_seqcount_retry(&tk_core.seq, seq)); 726 610 ··· 761 645 762 646 do { 763 647 seq = read_seqcount_begin(&tk_core.seq); 764 - base = tk->base_raw; 765 - nsecs = timekeeping_get_ns_raw(tk); 648 + base = tk->tkr_raw.base; 649 + nsecs = timekeeping_get_ns(&tk->tkr_raw); 766 650 767 651 } while (read_seqcount_retry(&tk_core.seq, seq)); 768 652 ··· 790 674 do { 791 675 seq = read_seqcount_begin(&tk_core.seq); 792 676 ts->tv_sec = tk->xtime_sec; 793 - nsec = timekeeping_get_ns(&tk->tkr); 677 + nsec = timekeeping_get_ns(&tk->tkr_mono); 794 678 tomono = tk->wall_to_monotonic; 795 679 796 680 } while (read_seqcount_retry(&tk_core.seq, seq)); ··· 875 759 ts_real->tv_sec = tk->xtime_sec; 876 760 ts_real->tv_nsec = 0; 877 761 878 - nsecs_raw = timekeeping_get_ns_raw(tk); 879 - nsecs_real = timekeeping_get_ns(&tk->tkr); 762 + nsecs_raw = timekeeping_get_ns(&tk->tkr_raw); 763 + nsecs_real = timekeeping_get_ns(&tk->tkr_mono); 880 764 881 765 } while (read_seqcount_retry(&tk_core.seq, seq)); 882 766 ··· 1059 943 */ 1060 944 if (try_module_get(new->owner)) { 1061 945 if (!new->enable || new->enable(new) == 0) { 1062 - old = tk->tkr.clock; 946 + old = tk->tkr_mono.clock; 1063 947 tk_setup_internals(tk, new); 1064 948 if (old->disable) 1065 949 old->disable(old); ··· 1087 971 { 1088 972 struct timekeeper *tk = &tk_core.timekeeper; 1089 973 1090 - if (tk->tkr.clock == clock) 974 + if (tk->tkr_mono.clock == clock) 1091 975 return 0; 1092 976 stop_machine(change_clocksource, clock, NULL); 1093 977 tick_clock_notify(); 1094 - return tk->tkr.clock == clock ? 0 : -1; 978 + return tk->tkr_mono.clock == clock ? 0 : -1; 1095 979 } 1096 980 1097 981 /** ··· 1109 993 1110 994 do { 1111 995 seq = read_seqcount_begin(&tk_core.seq); 1112 - nsecs = timekeeping_get_ns_raw(tk); 996 + nsecs = timekeeping_get_ns(&tk->tkr_raw); 1113 997 ts64 = tk->raw_time; 1114 998 1115 999 } while (read_seqcount_retry(&tk_core.seq, seq)); ··· 1132 1016 do { 1133 1017 seq = read_seqcount_begin(&tk_core.seq); 1134 1018 1135 - ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 1019 + ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 1136 1020 1137 1021 } while (read_seqcount_retry(&tk_core.seq, seq)); 1138 1022 ··· 1151 1035 do { 1152 1036 seq = read_seqcount_begin(&tk_core.seq); 1153 1037 1154 - ret = tk->tkr.clock->max_idle_ns; 1038 + ret = tk->tkr_mono.clock->max_idle_ns; 1155 1039 1156 1040 } while (read_seqcount_retry(&tk_core.seq, seq)); 1157 1041 ··· 1230 1114 tk_set_xtime(tk, &now); 1231 1115 tk->raw_time.tv_sec = 0; 1232 1116 tk->raw_time.tv_nsec = 0; 1233 - tk->base_raw.tv64 = 0; 1234 1117 if (boot.tv_sec == 0 && boot.tv_nsec == 0) 1235 1118 boot = tk_xtime(tk); 1236 1119 ··· 1315 1200 void timekeeping_resume(void) 1316 1201 { 1317 1202 struct timekeeper *tk = &tk_core.timekeeper; 1318 - struct clocksource *clock = tk->tkr.clock; 1203 + struct clocksource *clock = tk->tkr_mono.clock; 1319 1204 unsigned long flags; 1320 1205 struct timespec64 ts_new, ts_delta; 1321 1206 struct timespec tmp; ··· 1343 1228 * The less preferred source will only be tried if there is no better 1344 1229 * usable source. The rtc part is handled separately in rtc core code. 1345 1230 */ 1346 - cycle_now = tk->tkr.read(clock); 1231 + cycle_now = tk->tkr_mono.read(clock); 1347 1232 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && 1348 - cycle_now > tk->tkr.cycle_last) { 1233 + cycle_now > tk->tkr_mono.cycle_last) { 1349 1234 u64 num, max = ULLONG_MAX; 1350 1235 u32 mult = clock->mult; 1351 1236 u32 shift = clock->shift; 1352 1237 s64 nsec = 0; 1353 1238 1354 - cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, 1355 - tk->tkr.mask); 1239 + cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, 1240 + tk->tkr_mono.mask); 1356 1241 1357 1242 /* 1358 1243 * "cycle_delta * mutl" may cause 64 bits overflow, if the ··· 1378 1263 __timekeeping_inject_sleeptime(tk, &ts_delta); 1379 1264 1380 1265 /* Re-base the last cycle value */ 1381 - tk->tkr.cycle_last = cycle_now; 1266 + tk->tkr_mono.cycle_last = cycle_now; 1267 + tk->tkr_raw.cycle_last = cycle_now; 1268 + 1382 1269 tk->ntp_error = 0; 1383 1270 timekeeping_suspended = 0; 1384 1271 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); ··· 1533 1416 * 1534 1417 * XXX - TODO: Doc ntp_error calculation. 1535 1418 */ 1536 - if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) { 1419 + if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { 1537 1420 /* NTP adjustment caused clocksource mult overflow */ 1538 1421 WARN_ON_ONCE(1); 1539 1422 return; 1540 1423 } 1541 1424 1542 - tk->tkr.mult += mult_adj; 1425 + tk->tkr_mono.mult += mult_adj; 1543 1426 tk->xtime_interval += interval; 1544 - tk->tkr.xtime_nsec -= offset; 1427 + tk->tkr_mono.xtime_nsec -= offset; 1545 1428 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; 1546 1429 } 1547 1430 ··· 1603 1486 tk->ntp_err_mult = 0; 1604 1487 } 1605 1488 1606 - if (unlikely(tk->tkr.clock->maxadj && 1607 - (abs(tk->tkr.mult - tk->tkr.clock->mult) 1608 - > tk->tkr.clock->maxadj))) { 1489 + if (unlikely(tk->tkr_mono.clock->maxadj && 1490 + (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) 1491 + > tk->tkr_mono.clock->maxadj))) { 1609 1492 printk_once(KERN_WARNING 1610 1493 "Adjusting %s more than 11%% (%ld vs %ld)\n", 1611 - tk->tkr.clock->name, (long)tk->tkr.mult, 1612 - (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj); 1494 + tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult, 1495 + (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj); 1613 1496 } 1614 1497 1615 1498 /* ··· 1626 1509 * We'll correct this error next time through this function, when 1627 1510 * xtime_nsec is not as small. 1628 1511 */ 1629 - if (unlikely((s64)tk->tkr.xtime_nsec < 0)) { 1630 - s64 neg = -(s64)tk->tkr.xtime_nsec; 1631 - tk->tkr.xtime_nsec = 0; 1512 + if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { 1513 + s64 neg = -(s64)tk->tkr_mono.xtime_nsec; 1514 + tk->tkr_mono.xtime_nsec = 0; 1632 1515 tk->ntp_error += neg << tk->ntp_error_shift; 1633 1516 } 1634 1517 } ··· 1643 1526 */ 1644 1527 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) 1645 1528 { 1646 - u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift; 1529 + u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 1647 1530 unsigned int clock_set = 0; 1648 1531 1649 - while (tk->tkr.xtime_nsec >= nsecps) { 1532 + while (tk->tkr_mono.xtime_nsec >= nsecps) { 1650 1533 int leap; 1651 1534 1652 - tk->tkr.xtime_nsec -= nsecps; 1535 + tk->tkr_mono.xtime_nsec -= nsecps; 1653 1536 tk->xtime_sec++; 1654 1537 1655 1538 /* Figure out if its a leap sec and apply if needed */ ··· 1694 1577 1695 1578 /* Accumulate one shifted interval */ 1696 1579 offset -= interval; 1697 - tk->tkr.cycle_last += interval; 1580 + tk->tkr_mono.cycle_last += interval; 1581 + tk->tkr_raw.cycle_last += interval; 1698 1582 1699 - tk->tkr.xtime_nsec += tk->xtime_interval << shift; 1583 + tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; 1700 1584 *clock_set |= accumulate_nsecs_to_secs(tk); 1701 1585 1702 1586 /* Accumulate raw time */ ··· 1740 1622 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 1741 1623 offset = real_tk->cycle_interval; 1742 1624 #else 1743 - offset = clocksource_delta(tk->tkr.read(tk->tkr.clock), 1744 - tk->tkr.cycle_last, tk->tkr.mask); 1625 + offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), 1626 + tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 1745 1627 #endif 1746 1628 1747 1629 /* Check if there's really nothing to do */ 1748 1630 if (offset < real_tk->cycle_interval) 1749 1631 goto out; 1632 + 1633 + /* Do some additional sanity checking */ 1634 + timekeeping_check_update(real_tk, offset); 1750 1635 1751 1636 /* 1752 1637 * With NO_HZ we may have to accumulate many cycle_intervals ··· 1905 1784 do { 1906 1785 seq = read_seqcount_begin(&tk_core.seq); 1907 1786 1908 - base = tk->tkr.base_mono; 1909 - nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift; 1787 + base = tk->tkr_mono.base; 1788 + nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; 1910 1789 1911 1790 *offs_real = tk->offs_real; 1912 1791 *offs_boot = tk->offs_boot; ··· 1937 1816 do { 1938 1817 seq = read_seqcount_begin(&tk_core.seq); 1939 1818 1940 - base = tk->tkr.base_mono; 1941 - nsecs = timekeeping_get_ns(&tk->tkr); 1819 + base = tk->tkr_mono.base; 1820 + nsecs = timekeeping_get_ns(&tk->tkr_mono); 1942 1821 1943 1822 *offs_real = tk->offs_real; 1944 1823 *offs_boot = tk->offs_boot;

+29 -3

kernel/time/timer_list.c

··· 228 228 print_name_offset(m, dev->set_next_event); 229 229 SEQ_printf(m, "\n"); 230 230 231 - SEQ_printf(m, " set_mode: "); 232 - print_name_offset(m, dev->set_mode); 233 - SEQ_printf(m, "\n"); 231 + if (dev->set_mode) { 232 + SEQ_printf(m, " set_mode: "); 233 + print_name_offset(m, dev->set_mode); 234 + SEQ_printf(m, "\n"); 235 + } else { 236 + if (dev->set_mode_shutdown) { 237 + SEQ_printf(m, " shutdown: "); 238 + print_name_offset(m, dev->set_mode_shutdown); 239 + SEQ_printf(m, "\n"); 240 + } 241 + 242 + if (dev->set_mode_periodic) { 243 + SEQ_printf(m, " periodic: "); 244 + print_name_offset(m, dev->set_mode_periodic); 245 + SEQ_printf(m, "\n"); 246 + } 247 + 248 + if (dev->set_mode_oneshot) { 249 + SEQ_printf(m, " oneshot: "); 250 + print_name_offset(m, dev->set_mode_oneshot); 251 + SEQ_printf(m, "\n"); 252 + } 253 + 254 + if (dev->set_mode_resume) { 255 + SEQ_printf(m, " resume: "); 256 + print_name_offset(m, dev->set_mode_resume); 257 + SEQ_printf(m, "\n"); 258 + } 259 + } 234 260 235 261 SEQ_printf(m, " event_handler: "); 236 262 print_name_offset(m, dev->event_handler);

+13

lib/Kconfig.debug

··· 865 865 data corruption or a sporadic crash at a later stage once the region 866 866 is examined. The runtime overhead introduced is minimal. 867 867 868 + config DEBUG_TIMEKEEPING 869 + bool "Enable extra timekeeping sanity checking" 870 + help 871 + This option will enable additional timekeeping sanity checks 872 + which may be helpful when diagnosing issues where timekeeping 873 + problems are suspected. 874 + 875 + This may include checks in the timekeeping hotpaths, so this 876 + option may have a (very small) performance impact to some 877 + workloads. 878 + 879 + If unsure, say N. 880 + 868 881 config TIMER_STATS 869 882 bool "Collect kernel timers statistics" 870 883 depends on DEBUG_KERNEL && PROC_FS

+13 -13

mm/huge_memory.c

··· 1260 1260 int target_nid, last_cpupid = -1; 1261 1261 bool page_locked; 1262 1262 bool migrated = false; 1263 + bool was_writable; 1263 1264 int flags = 0; 1264 1265 1265 1266 /* A PROT_NONE fault should not end up here */ ··· 1292 1291 flags |= TNF_FAULT_LOCAL; 1293 1292 } 1294 1293 1295 - /* 1296 - * Avoid grouping on DSO/COW pages in specific and RO pages 1297 - * in general, RO pages shouldn't hurt as much anyway since 1298 - * they can be in shared cache state. 1299 - * 1300 - * FIXME! This checks "pmd_dirty()" as an approximation of 1301 - * "is this a read-only page", since checking "pmd_write()" 1302 - * is even more broken. We haven't actually turned this into 1303 - * a writable page, so pmd_write() will always be false. 1304 - */ 1305 - if (!pmd_dirty(pmd)) 1294 + /* See similar comment in do_numa_page for explanation */ 1295 + if (!(vma->vm_flags & VM_WRITE)) 1306 1296 flags |= TNF_NO_GROUP; 1307 1297 1308 1298 /* ··· 1350 1358 if (migrated) { 1351 1359 flags |= TNF_MIGRATED; 1352 1360 page_nid = target_nid; 1353 - } 1361 + } else 1362 + flags |= TNF_MIGRATE_FAIL; 1354 1363 1355 1364 goto out; 1356 1365 clear_pmdnuma: 1357 1366 BUG_ON(!PageLocked(page)); 1367 + was_writable = pmd_write(pmd); 1358 1368 pmd = pmd_modify(pmd, vma->vm_page_prot); 1369 + pmd = pmd_mkyoung(pmd); 1370 + if (was_writable) 1371 + pmd = pmd_mkwrite(pmd); 1359 1372 set_pmd_at(mm, haddr, pmdp, pmd); 1360 1373 update_mmu_cache_pmd(vma, addr, pmdp); 1361 1374 unlock_page(page); ··· 1484 1487 1485 1488 if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { 1486 1489 pmd_t entry; 1490 + bool preserve_write = prot_numa && pmd_write(*pmd); 1487 1491 ret = 1; 1488 1492 1489 1493 /* ··· 1500 1502 if (!prot_numa || !pmd_protnone(*pmd)) { 1501 1503 entry = pmdp_get_and_clear_notify(mm, addr, pmd); 1502 1504 entry = pmd_modify(entry, newprot); 1505 + if (preserve_write) 1506 + entry = pmd_mkwrite(entry); 1503 1507 ret = HPAGE_PMD_NR; 1504 1508 set_pmd_at(mm, addr, pmd, entry); 1505 - BUG_ON(pmd_write(entry)); 1509 + BUG_ON(!preserve_write && pmd_write(entry)); 1506 1510 } 1507 1511 spin_unlock(ptl); 1508 1512 }

+12 -10

mm/memory.c

··· 3035 3035 int last_cpupid; 3036 3036 int target_nid; 3037 3037 bool migrated = false; 3038 + bool was_writable = pte_write(pte); 3038 3039 int flags = 0; 3039 3040 3040 3041 /* A PROT_NONE fault should not end up here */ ··· 3060 3059 /* Make it present again */ 3061 3060 pte = pte_modify(pte, vma->vm_page_prot); 3062 3061 pte = pte_mkyoung(pte); 3062 + if (was_writable) 3063 + pte = pte_mkwrite(pte); 3063 3064 set_pte_at(mm, addr, ptep, pte); 3064 3065 update_mmu_cache(vma, addr, ptep); 3065 3066 ··· 3072 3069 } 3073 3070 3074 3071 /* 3075 - * Avoid grouping on DSO/COW pages in specific and RO pages 3076 - * in general, RO pages shouldn't hurt as much anyway since 3077 - * they can be in shared cache state. 3078 - * 3079 - * FIXME! This checks "pmd_dirty()" as an approximation of 3080 - * "is this a read-only page", since checking "pmd_write()" 3081 - * is even more broken. We haven't actually turned this into 3082 - * a writable page, so pmd_write() will always be false. 3072 + * Avoid grouping on RO pages in general. RO pages shouldn't hurt as 3073 + * much anyway since they can be in shared cache state. This misses 3074 + * the case where a mapping is writable but the process never writes 3075 + * to it but pte_write gets cleared during protection updates and 3076 + * pte_dirty has unpredictable behaviour between PTE scan updates, 3077 + * background writeback, dirty balancing and application behaviour. 3083 3078 */ 3084 - if (!pte_dirty(pte)) 3079 + if (!(vma->vm_flags & VM_WRITE)) 3085 3080 flags |= TNF_NO_GROUP; 3086 3081 3087 3082 /* ··· 3103 3102 if (migrated) { 3104 3103 page_nid = target_nid; 3105 3104 flags |= TNF_MIGRATED; 3106 - } 3105 + } else 3106 + flags |= TNF_MIGRATE_FAIL; 3107 3107 3108 3108 out: 3109 3109 if (page_nid != -1)

+4 -9

mm/memory_hotplug.c

··· 1092 1092 return NULL; 1093 1093 1094 1094 arch_refresh_nodedata(nid, pgdat); 1095 + } else { 1096 + /* Reset the nr_zones and classzone_idx to 0 before reuse */ 1097 + pgdat->nr_zones = 0; 1098 + pgdat->classzone_idx = 0; 1095 1099 } 1096 1100 1097 1101 /* we can use NODE_DATA(nid) from here */ ··· 1981 1977 if (is_vmalloc_addr(zone->wait_table)) 1982 1978 vfree(zone->wait_table); 1983 1979 } 1984 - 1985 - /* 1986 - * Since there is no way to guarentee the address of pgdat/zone is not 1987 - * on stack of any kernel threads or used by other kernel objects 1988 - * without reference counting or other symchronizing method, do not 1989 - * reset node_data and free pgdat here. Just reset it to 0 and reuse 1990 - * the memory when the node is online again. 1991 - */ 1992 - memset(pgdat, 0, sizeof(*pgdat)); 1993 1980 } 1994 1981 EXPORT_SYMBOL(try_offline_node); 1995 1982

+1 -3

mm/mmap.c

··· 774 774 775 775 importer->anon_vma = exporter->anon_vma; 776 776 error = anon_vma_clone(importer, exporter); 777 - if (error) { 778 - importer->anon_vma = NULL; 777 + if (error) 779 778 return error; 780 - } 781 779 } 782 780 } 783 781

+3

mm/mprotect.c

··· 75 75 oldpte = *pte; 76 76 if (pte_present(oldpte)) { 77 77 pte_t ptent; 78 + bool preserve_write = prot_numa && pte_write(oldpte); 78 79 79 80 /* 80 81 * Avoid trapping faults against the zero or KSM ··· 95 94 96 95 ptent = ptep_modify_prot_start(mm, addr, pte); 97 96 ptent = pte_modify(ptent, newprot); 97 + if (preserve_write) 98 + ptent = pte_mkwrite(ptent); 98 99 99 100 /* Avoid taking write faults for known dirty pages */ 100 101 if (dirty_accountable && pte_dirty(ptent) &&

+5 -2

mm/page-writeback.c

··· 857 857 * bw * elapsed + write_bandwidth * (period - elapsed) 858 858 * write_bandwidth = --------------------------------------------------- 859 859 * period 860 + * 861 + * @written may have decreased due to account_page_redirty(). 862 + * Avoid underflowing @bw calculation. 860 863 */ 861 - bw = written - bdi->written_stamp; 864 + bw = written - min(written, bdi->written_stamp); 862 865 bw *= HZ; 863 866 if (unlikely(elapsed > period)) { 864 867 do_div(bw, elapsed); ··· 925 922 unsigned long now) 926 923 { 927 924 static DEFINE_SPINLOCK(dirty_lock); 928 - static unsigned long update_time; 925 + static unsigned long update_time = INITIAL_JIFFIES; 929 926 930 927 /* 931 928 * check locklessly first to optimize away locking for the most time

+1

mm/page_isolation.c

··· 103 103 104 104 if (!is_migrate_isolate_page(buddy)) { 105 105 __isolate_free_page(page, order); 106 + kernel_map_pages(page, (1 << order), 1); 106 107 set_page_refcounted(page); 107 108 isolated_page = page; 108 109 }

+8 -1

mm/pagewalk.c

··· 265 265 vma = vma->vm_next; 266 266 267 267 err = walk_page_test(start, next, walk); 268 - if (err > 0) 268 + if (err > 0) { 269 + /* 270 + * positive return values are purely for 271 + * controlling the pagewalk, so should never 272 + * be passed to the callers. 273 + */ 274 + err = 0; 269 275 continue; 276 + } 270 277 if (err < 0) 271 278 break; 272 279 }

+7

mm/rmap.c

··· 287 287 return 0; 288 288 289 289 enomem_failure: 290 + /* 291 + * dst->anon_vma is dropped here otherwise its degree can be incorrectly 292 + * decremented in unlink_anon_vmas(). 293 + * We can safely do this because callers of anon_vma_clone() don't care 294 + * about dst->anon_vma if anon_vma_clone() failed. 295 + */ 296 + dst->anon_vma = NULL; 290 297 unlink_anon_vmas(dst); 291 298 return -ENOMEM; 292 299 }

+4 -2

mm/slub.c

··· 2449 2449 do { 2450 2450 tid = this_cpu_read(s->cpu_slab->tid); 2451 2451 c = raw_cpu_ptr(s->cpu_slab); 2452 - } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); 2452 + } while (IS_ENABLED(CONFIG_PREEMPT) && 2453 + unlikely(tid != READ_ONCE(c->tid))); 2453 2454 2454 2455 /* 2455 2456 * Irqless object alloc/free algorithm used here depends on sequence ··· 2719 2718 do { 2720 2719 tid = this_cpu_read(s->cpu_slab->tid); 2721 2720 c = raw_cpu_ptr(s->cpu_slab); 2722 - } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); 2721 + } while (IS_ENABLED(CONFIG_PREEMPT) && 2722 + unlikely(tid != READ_ONCE(c->tid))); 2723 2723 2724 2724 /* Same with comment on barrier() in slab_alloc_node() */ 2725 2725 barrier();

+7

net/compat.c

··· 49 49 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || 50 50 __get_user(kmsg->msg_flags, &umsg->msg_flags)) 51 51 return -EFAULT; 52 + 53 + if (!uaddr) 54 + kmsg->msg_namelen = 0; 55 + 56 + if (kmsg->msg_namelen < 0) 57 + return -EINVAL; 58 + 52 59 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) 53 60 kmsg->msg_namelen = sizeof(struct sockaddr_storage); 54 61 kmsg->msg_control = compat_ptr(tmp3);

+3 -3

net/ipv4/netfilter/ip_tables.c

··· 272 272 &chainname, &comment, &rulenum) != 0) 273 273 break; 274 274 275 - nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, 276 - "TRACE: %s:%s:%s:%u ", 277 - tablename, chainname, comment, rulenum); 275 + nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, 276 + "TRACE: %s:%s:%s:%u ", 277 + tablename, chainname, comment, rulenum); 278 278 } 279 279 #endif 280 280

+1 -5

net/ipv4/tcp_output.c

··· 2773 2773 } else { 2774 2774 /* Socket is locked, keep trying until memory is available. */ 2775 2775 for (;;) { 2776 - skb = alloc_skb_fclone(MAX_TCP_HEADER, 2777 - sk->sk_allocation); 2776 + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); 2778 2777 if (skb) 2779 2778 break; 2780 2779 yield(); 2781 2780 } 2782 - 2783 - /* Reserve space for headers and prepare control bits. */ 2784 - skb_reserve(skb, MAX_TCP_HEADER); 2785 2781 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2786 2782 tcp_init_nondata_skb(skb, tp->write_seq, 2787 2783 TCPHDR_ACK | TCPHDR_FIN);

+1

net/ipv6/fib6_rules.c

··· 104 104 goto again; 105 105 flp6->saddr = saddr; 106 106 } 107 + err = rt->dst.error; 107 108 goto out; 108 109 } 109 110 again:

+3 -3

net/ipv6/netfilter/ip6_tables.c

··· 298 298 &chainname, &comment, &rulenum) != 0) 299 299 break; 300 300 301 - nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, 302 - "TRACE: %s:%s:%s:%u ", 303 - tablename, chainname, comment, rulenum); 301 + nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, 302 + "TRACE: %s:%s:%s:%u ", 303 + tablename, chainname, comment, rulenum); 304 304 } 305 305 #endif 306 306

+3 -5

net/ipv6/udp_offload.c

··· 112 112 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); 113 113 fptr->nexthdr = nexthdr; 114 114 fptr->reserved = 0; 115 - if (skb_shinfo(skb)->ip6_frag_id) 116 - fptr->identification = skb_shinfo(skb)->ip6_frag_id; 117 - else 118 - ipv6_select_ident(fptr, 119 - (struct rt6_info *)skb_dst(skb)); 115 + if (!skb_shinfo(skb)->ip6_frag_id) 116 + ipv6_proxy_select_ident(skb); 117 + fptr->identification = skb_shinfo(skb)->ip6_frag_id; 120 118 121 119 /* Fragment the skb. ipv6 header and the remaining fields of the 122 120 * fragment header are updated in ipv6_gso_segment()

+24

net/netfilter/nf_log.c

··· 212 212 } 213 213 EXPORT_SYMBOL(nf_log_packet); 214 214 215 + void nf_log_trace(struct net *net, 216 + u_int8_t pf, 217 + unsigned int hooknum, 218 + const struct sk_buff *skb, 219 + const struct net_device *in, 220 + const struct net_device *out, 221 + const struct nf_loginfo *loginfo, const char *fmt, ...) 222 + { 223 + va_list args; 224 + char prefix[NF_LOG_PREFIXLEN]; 225 + const struct nf_logger *logger; 226 + 227 + rcu_read_lock(); 228 + logger = rcu_dereference(net->nf.nf_loggers[pf]); 229 + if (logger) { 230 + va_start(args, fmt); 231 + vsnprintf(prefix, sizeof(prefix), fmt, args); 232 + va_end(args); 233 + logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix); 234 + } 235 + rcu_read_unlock(); 236 + } 237 + EXPORT_SYMBOL(nf_log_trace); 238 + 215 239 #define S_SIZE (1024 - (sizeof(unsigned int) + 1)) 216 240 217 241 struct nf_log_buf {

+4 -1

net/netfilter/nf_tables_api.c

··· 1225 1225 1226 1226 if (nla[NFTA_CHAIN_POLICY]) { 1227 1227 if ((chain != NULL && 1228 - !(chain->flags & NFT_BASE_CHAIN)) || 1228 + !(chain->flags & NFT_BASE_CHAIN))) 1229 + return -EOPNOTSUPP; 1230 + 1231 + if (chain == NULL && 1229 1232 nla[NFTA_CHAIN_HOOK] == NULL) 1230 1233 return -EOPNOTSUPP; 1231 1234

+4 -4

net/netfilter/nf_tables_core.c

··· 94 94 { 95 95 struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); 96 96 97 - nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, 98 - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", 99 - chain->table->name, chain->name, comments[type], 100 - rulenum); 97 + nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, 98 + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", 99 + chain->table->name, chain->name, comments[type], 100 + rulenum); 101 101 } 102 102 103 103 unsigned int

+3

net/netfilter/nfnetlink_cthelper.c

··· 77 77 if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) 78 78 return -EINVAL; 79 79 80 + /* Not all fields are initialized so first zero the tuple */ 81 + memset(tuple, 0, sizeof(struct nf_conntrack_tuple)); 82 + 80 83 tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); 81 84 tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); 82 85

+6

net/netfilter/nft_compat.c

··· 133 133 entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; 134 134 break; 135 135 case AF_INET6: 136 + if (proto) 137 + entry->e6.ipv6.flags |= IP6T_F_PROTO; 138 + 136 139 entry->e6.ipv6.proto = proto; 137 140 entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; 138 141 break; ··· 347 344 entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; 348 345 break; 349 346 case AF_INET6: 347 + if (proto) 348 + entry->e6.ipv6.flags |= IP6T_F_PROTO; 349 + 350 350 entry->e6.ipv6.proto = proto; 351 351 entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; 352 352 break;

+2

net/netfilter/nft_hash.c

··· 153 153 iter->err = err; 154 154 goto out; 155 155 } 156 + 157 + continue; 156 158 } 157 159 158 160 if (iter->count < iter->skip)

+2 -2

net/netfilter/xt_TPROXY.c

··· 513 513 { 514 514 const struct ip6t_ip6 *i = par->entryinfo; 515 515 516 - if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) 517 - && !(i->flags & IP6T_INV_PROTO)) 516 + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && 517 + !(i->invflags & IP6T_INV_PROTO)) 518 518 return 0; 519 519 520 520 pr_info("Can be used only in combination with "

+4

net/socket.c

··· 1702 1702 1703 1703 if (len > INT_MAX) 1704 1704 len = INT_MAX; 1705 + if (unlikely(!access_ok(VERIFY_READ, buff, len))) 1706 + return -EFAULT; 1705 1707 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1706 1708 if (!sock) 1707 1709 goto out; ··· 1762 1760 1763 1761 if (size > INT_MAX) 1764 1762 size = INT_MAX; 1763 + if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size))) 1764 + return -EFAULT; 1765 1765 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1766 1766 if (!sock) 1767 1767 goto out;

+8

tools/testing/selftests/Makefile

··· 22 22 TARGETS_HOTPLUG = cpu-hotplug 23 23 TARGETS_HOTPLUG += memory-hotplug 24 24 25 + # Clear LDFLAGS and MAKEFLAGS if called from main 26 + # Makefile to avoid test build failures when test 27 + # Makefile doesn't have explicit build rules. 28 + ifeq (1,$(MAKELEVEL)) 29 + undefine LDFLAGS 30 + override MAKEFLAGS = 31 + endif 32 + 25 33 all: 26 34 for TARGET in $(TARGETS); do \ 27 35 make -C $$TARGET; \

+7 -7

virt/kvm/kvm_main.c

··· 471 471 BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); 472 472 473 473 r = -ENOMEM; 474 - kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 474 + kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots)); 475 475 if (!kvm->memslots) 476 476 goto out_err_no_srcu; 477 477 ··· 522 522 out_err_no_disable: 523 523 for (i = 0; i < KVM_NR_BUSES; i++) 524 524 kfree(kvm->buses[i]); 525 - kfree(kvm->memslots); 525 + kvfree(kvm->memslots); 526 526 kvm_arch_free_vm(kvm); 527 527 return ERR_PTR(r); 528 528 } ··· 578 578 kvm_for_each_memslot(memslot, slots) 579 579 kvm_free_physmem_slot(kvm, memslot, NULL); 580 580 581 - kfree(kvm->memslots); 581 + kvfree(kvm->memslots); 582 582 } 583 583 584 584 static void kvm_destroy_devices(struct kvm *kvm) ··· 871 871 goto out_free; 872 872 } 873 873 874 - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), 875 - GFP_KERNEL); 874 + slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); 876 875 if (!slots) 877 876 goto out_free; 877 + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 878 878 879 879 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { 880 880 slot = id_to_memslot(slots, mem->slot); ··· 917 917 kvm_arch_commit_memory_region(kvm, mem, &old, change); 918 918 919 919 kvm_free_physmem_slot(kvm, &old, &new); 920 - kfree(old_memslots); 920 + kvfree(old_memslots); 921 921 922 922 /* 923 923 * IOMMU mapping: New slots need to be mapped. Old slots need to be ··· 936 936 return 0; 937 937 938 938 out_slots: 939 - kfree(slots); 939 + kvfree(slots); 940 940 out_free: 941 941 kvm_free_physmem_slot(kvm, &new, &old); 942 942 out: