Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cputime: Generic on-demand virtual cputime accounting

If we want to stop the tick further idle, we need to be
able to account the cputime without using the tick.

Virtual based cputime accounting solves that problem by
hooking into kernel/user boundaries.

However implementing CONFIG_VIRT_CPU_ACCOUNTING require
low level hooks and involves more overhead. But we already
have a generic context tracking subsystem that is required
for RCU needs by archs which plan to shut down the tick
outside idle.

This patch implements a generic virtual based cputime
accounting that relies on these generic kernel/user hooks.

There are some upsides of doing this:

- This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING
if context tracking is already built (already necessary for RCU in full
tickless mode).

- We can rely on the generic context tracking subsystem to dynamically
(de)activate the hooks, so that we can switch anytime between virtual
and tick based accounting. This way we don't have the overhead
of the virtual accounting when the tick is running periodically.

And one downside:

- There is probably more overhead than a native virtual based cputime
accounting. But this relies on hooks that are already set anyway.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>

+160 -54
+3 -3
arch/ia64/include/asm/cputime.h
··· 11 11 * as published by the Free Software Foundation; either version 12 12 * 2 of the License, or (at your option) any later version. 13 13 * 14 - * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec. 14 + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec. 15 15 * Otherwise we measure cpu time in jiffies using the generic definitions. 16 16 */ 17 17 18 18 #ifndef __IA64_CPUTIME_H 19 19 #define __IA64_CPUTIME_H 20 20 21 - #ifndef CONFIG_VIRT_CPU_ACCOUNTING 21 + #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 22 22 # include <asm-generic/cputime.h> 23 23 #else 24 24 # include <asm/processor.h> 25 25 # include <asm-generic/cputime_nsecs.h> 26 26 extern void arch_vtime_task_switch(struct task_struct *tsk); 27 - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 27 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 28 28 29 29 #endif /* __IA64_CPUTIME_H */
+2 -2
arch/ia64/include/asm/thread_info.h
··· 31 31 mm_segment_t addr_limit; /* user-level address space limit */ 32 32 int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ 33 33 struct restart_block restart_block; 34 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 34 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 35 35 __u64 ac_stamp; 36 36 __u64 ac_leave; 37 37 __u64 ac_stime; ··· 69 69 #define task_stack_page(tsk) ((void *)(tsk)) 70 70 71 71 #define __HAVE_THREAD_FUNCTIONS 72 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 72 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 73 73 #define setup_thread_stack(p, org) \ 74 74 *task_thread_info(p) = *task_thread_info(org); \ 75 75 task_thread_info(p)->ac_stime = 0; \
+1 -1
arch/ia64/include/asm/xen/minstate.h
··· 1 1 2 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 2 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 3 3 /* read ar.itc in advance, and use it before leaving bank 0 */ 4 4 #define XEN_ACCOUNT_GET_STAMP \ 5 5 MOV_FROM_ITC(pUStk, p6, r20, r2);
+1 -1
arch/ia64/kernel/asm-offsets.c
··· 41 41 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); 42 42 DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); 43 43 DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); 44 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 44 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 45 45 DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); 46 46 DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); 47 47 DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+8 -8
arch/ia64/kernel/entry.S
··· 724 724 #endif 725 725 .global __paravirt_work_processed_syscall; 726 726 __paravirt_work_processed_syscall: 727 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 727 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 728 728 adds r2=PT(LOADRS)+16,r12 729 729 MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave 730 730 adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 ··· 762 762 763 763 ld8 r29=[r2],16 // M0|1 load cr.ipsr 764 764 ld8 r28=[r3],16 // M0|1 load cr.iip 765 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 765 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 766 766 (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 767 767 ;; 768 768 ld8 r30=[r2],16 // M0|1 load cr.ifs ··· 793 793 ld8.fill r1=[r3],16 // M0|1 load r1 794 794 (pUStk) mov r17=1 // A 795 795 ;; 796 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 796 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 797 797 (pUStk) st1 [r15]=r17 // M2|3 798 798 #else 799 799 (pUStk) st1 [r14]=r17 // M2|3 ··· 813 813 shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition 814 814 COVER // B add current frame into dirty partition & set cr.ifs 815 815 ;; 816 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 816 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 817 817 mov r19=ar.bsp // M2 get new backing store pointer 818 818 st8 [r14]=r22 // M save time at leave 819 819 mov f10=f0 // F clear f10 ··· 948 948 adds r16=PT(CR_IPSR)+16,r12 949 949 adds r17=PT(CR_IIP)+16,r12 950 950 951 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 951 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 952 952 .pred.rel.mutex pUStk,pKStk 953 953 MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled 954 954 MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave ··· 981 981 ;; 982 982 ld8.fill r12=[r16],16 983 983 ld8.fill r13=[r17],16 984 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 984 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 985 985 (pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 986 986 #else 987 987 (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 ··· 989 989 ;; 990 990 ld8 r20=[r16],16 // ar.fpsr 991 991 ld8.fill r15=[r17],16 992 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 992 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 993 993 (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred 994 994 #endif 995 995 ;; ··· 997 997 ld8.fill r2=[r17] 998 998 (pUStk) mov r17=1 999 999 ;; 1000 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 1000 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 1001 1001 // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; 1002 1002 // mib : mov add br -> mib : ld8 add br 1003 1003 // bbb_ : br nop cover;; mbb_ : mov br cover;;
+2 -2
arch/ia64/kernel/fsys.S
··· 529 529 nop.i 0 530 530 ;; 531 531 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 532 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 532 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 533 533 MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting 534 534 #else 535 535 nop.m 0 ··· 555 555 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 556 556 br.call.sptk.many b7=ia64_syscall_setup // B 557 557 ;; 558 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 558 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 559 559 // mov.m r30=ar.itc is called in advance 560 560 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 561 561 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+2 -2
arch/ia64/kernel/head.S
··· 1073 1073 sched_clock = ia64_native_sched_clock 1074 1074 #endif 1075 1075 1076 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 1076 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 1077 1077 GLOBAL_ENTRY(cycle_to_cputime) 1078 1078 alloc r16=ar.pfs,1,0,0,0 1079 1079 addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 ··· 1091 1091 shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT 1092 1092 br.ret.sptk.many rp 1093 1093 END(cycle_to_cputime) 1094 - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 1094 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 1095 1095 1096 1096 #ifdef CONFIG_IA64_BRL_EMU 1097 1097
+4 -4
arch/ia64/kernel/ivt.S
··· 784 784 785 785 (p8) adds r28=16,r28 // A switch cr.iip to next bundle 786 786 (p9) adds r8=1,r8 // A increment ei to next slot 787 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 787 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 788 788 ;; 789 789 mov b6=r30 // I0 setup syscall handler branch reg early 790 790 #else ··· 801 801 // 802 802 /////////////////////////////////////////////////////////////////////// 803 803 st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag 804 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 804 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 805 805 MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting 806 806 #else 807 807 mov b6=r30 // I0 setup syscall handler branch reg early ··· 817 817 cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? 818 818 br.call.sptk.many b7=ia64_syscall_setup // B 819 819 1: 820 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 820 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 821 821 // mov.m r30=ar.itc is called in advance, and r13 is current 822 822 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A 823 823 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A ··· 1043 1043 DBG_FAULT(16) 1044 1044 FAULT(16) 1045 1045 1046 - #if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) 1046 + #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) 1047 1047 /* 1048 1048 * There is no particular reason for this code to be here, other than 1049 1049 * that there happens to be space here that would go unused otherwise.
+1 -1
arch/ia64/kernel/minstate.h
··· 4 4 #include "entry.h" 5 5 #include "paravirt_inst.h" 6 6 7 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 7 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 8 8 /* read ar.itc in advance, and use it before leaving bank 0 */ 9 9 #define ACCOUNT_GET_STAMP \ 10 10 (pUStk) mov.m r20=ar.itc;
+2 -2
arch/ia64/kernel/time.c
··· 77 77 }; 78 78 static struct clocksource *itc_clocksource; 79 79 80 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 80 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 81 81 82 82 #include <linux/kernel_stat.h> 83 83 ··· 142 142 account_idle_time(vtime_delta(tsk)); 143 143 } 144 144 145 - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 145 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 146 146 147 147 static irqreturn_t 148 148 timer_interrupt (int irq, void *dev_id)
+1 -1
arch/powerpc/configs/chroma_defconfig
··· 1 1 CONFIG_PPC64=y 2 2 CONFIG_PPC_BOOK3E_64=y 3 - # CONFIG_VIRT_CPU_ACCOUNTING is not set 3 + # CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set 4 4 CONFIG_SMP=y 5 5 CONFIG_NR_CPUS=256 6 6 CONFIG_EXPERIMENTAL=y
+1 -1
arch/powerpc/configs/corenet64_smp_defconfig
··· 1 1 CONFIG_PPC64=y 2 2 CONFIG_PPC_BOOK3E_64=y 3 - # CONFIG_VIRT_CPU_ACCOUNTING is not set 3 + # CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set 4 4 CONFIG_SMP=y 5 5 CONFIG_NR_CPUS=2 6 6 CONFIG_EXPERIMENTAL=y
+1 -1
arch/powerpc/configs/pasemi_defconfig
··· 1 1 CONFIG_PPC64=y 2 2 CONFIG_ALTIVEC=y 3 - # CONFIG_VIRT_CPU_ACCOUNTING is not set 3 + # CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set 4 4 CONFIG_SMP=y 5 5 CONFIG_NR_CPUS=2 6 6 CONFIG_EXPERIMENTAL=y
+3 -3
arch/powerpc/include/asm/cputime.h
··· 8 8 * as published by the Free Software Foundation; either version 9 9 * 2 of the License, or (at your option) any later version. 10 10 * 11 - * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in 11 + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in 12 12 * the same units as the timebase. Otherwise we measure cpu time 13 13 * in jiffies using the generic definitions. 14 14 */ ··· 16 16 #ifndef __POWERPC_CPUTIME_H 17 17 #define __POWERPC_CPUTIME_H 18 18 19 - #ifndef CONFIG_VIRT_CPU_ACCOUNTING 19 + #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 20 20 #include <asm-generic/cputime.h> 21 21 #ifdef __KERNEL__ 22 22 static inline void setup_cputime_one_jiffy(void) { } ··· 231 231 static inline void arch_vtime_task_switch(struct task_struct *tsk) { } 232 232 233 233 #endif /* __KERNEL__ */ 234 - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 234 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 235 235 #endif /* __POWERPC_CPUTIME_H */
+1 -1
arch/powerpc/include/asm/lppaca.h
··· 145 145 extern struct kmem_cache *dtl_cache; 146 146 147 147 /* 148 - * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls 148 + * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls 149 149 * reading from the dispatch trace log. If other code wants to consume 150 150 * DTL entries, it can set this pointer to a function that will get 151 151 * called once for each DTL entry that gets processed.
+2 -2
arch/powerpc/include/asm/ppc_asm.h
··· 24 24 * user_time and system_time fields in the paca. 25 25 */ 26 26 27 - #ifndef CONFIG_VIRT_CPU_ACCOUNTING 27 + #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 28 28 #define ACCOUNT_CPU_USER_ENTRY(ra, rb) 29 29 #define ACCOUNT_CPU_USER_EXIT(ra, rb) 30 30 #define ACCOUNT_STOLEN_TIME ··· 70 70 71 71 #endif /* CONFIG_PPC_SPLPAR */ 72 72 73 - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 73 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 74 74 75 75 /* 76 76 * Macros for storing registers into and loading registers from
+2 -2
arch/powerpc/kernel/entry_64.S
··· 94 94 addi r9,r1,STACK_FRAME_OVERHEAD 95 95 ld r11,exception_marker@toc(r2) 96 96 std r11,-16(r9) /* "regshere" marker */ 97 - #if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) 97 + #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) 98 98 BEGIN_FW_FTR_SECTION 99 99 beq 33f 100 100 /* if from user, see if there are any DTL entries to process */ ··· 110 110 addi r9,r1,STACK_FRAME_OVERHEAD 111 111 33: 112 112 END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) 113 - #endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ 113 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */ 114 114 115 115 /* 116 116 * A syscall should always be called with interrupts enabled
+2 -2
arch/powerpc/kernel/time.c
··· 143 143 unsigned long ppc_tb_freq; 144 144 EXPORT_SYMBOL_GPL(ppc_tb_freq); 145 145 146 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 146 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 147 147 /* 148 148 * Factors for converting from cputime_t (timebase ticks) to 149 149 * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). ··· 377 377 account_user_time(tsk, utime, utimescaled); 378 378 } 379 379 380 - #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ 380 + #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 381 381 #define calc_cputime_factors() 382 382 #endif 383 383
+3 -3
arch/powerpc/platforms/pseries/dtl.c
··· 57 57 */ 58 58 static int dtl_buf_entries = N_DISPATCH_LOG; 59 59 60 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 60 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 61 61 struct dtl_ring { 62 62 u64 write_index; 63 63 struct dtl_entry *write_ptr; ··· 142 142 return per_cpu(dtl_rings, dtl->cpu).write_index; 143 143 } 144 144 145 - #else /* CONFIG_VIRT_CPU_ACCOUNTING */ 145 + #else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 146 146 147 147 static int dtl_start(struct dtl *dtl) 148 148 { ··· 188 188 { 189 189 return lppaca_of(dtl->cpu).dtl_idx; 190 190 } 191 - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 191 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 192 192 193 193 static int dtl_enable(struct dtl *dtl) 194 194 {
+3 -3
arch/powerpc/platforms/pseries/setup.c
··· 281 281 282 282 struct kmem_cache *dtl_cache; 283 283 284 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 284 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 285 285 /* 286 286 * Allocate space for the dispatch trace log for all possible cpus 287 287 * and register the buffers with the hypervisor. This is used for ··· 332 332 333 333 return 0; 334 334 } 335 - #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ 335 + #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 336 336 static inline int alloc_dispatch_logs(void) 337 337 { 338 338 return 0; 339 339 } 340 - #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 340 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 341 341 342 342 static int alloc_dispatch_log_kmem_cache(void) 343 343 {
+7 -1
include/asm-generic/cputime.h
··· 4 4 #include <linux/time.h> 5 5 #include <linux/jiffies.h> 6 6 7 - #include <asm-generic/cputime_jiffies.h> 7 + #ifndef CONFIG_VIRT_CPU_ACCOUNTING 8 + # include <asm-generic/cputime_jiffies.h> 9 + #endif 10 + 11 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 12 + # include <asm-generic/cputime_nsecs.h> 13 + #endif 8 14 9 15 #endif
+8
include/asm-generic/cputime_nsecs.h
··· 26 26 */ 27 27 #define cputime_to_jiffies(__ct) \ 28 28 ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) 29 + #define cputime_to_scaled(__ct) (__ct) 29 30 #define jiffies_to_cputime(__jif) \ 30 31 (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) 31 32 #define cputime64_to_jiffies64(__ct) \ 32 33 ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) 33 34 #define jiffies64_to_cputime64(__jif) \ 34 35 (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) 36 + 37 + 38 + /* 39 + * Convert cputime <-> nanoseconds 40 + */ 41 + #define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs)) 42 + 35 43 36 44 /* 37 45 * Convert cputime <-> microseconds
+1 -1
include/linux/kernel_stat.h
··· 127 127 extern void account_steal_time(cputime_t); 128 128 extern void account_idle_time(cputime_t); 129 129 130 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING 130 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 131 131 static inline void account_process_tick(struct task_struct *tsk, int user) 132 132 { 133 133 vtime_account_user(tsk);
+16
include/linux/vtime.h
··· 14 14 static inline void vtime_task_switch(struct task_struct *prev) { } 15 15 static inline void vtime_account_system(struct task_struct *tsk) { } 16 16 static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } 17 + static inline void vtime_account_user(struct task_struct *tsk) { } 17 18 static inline void vtime_account(struct task_struct *tsk) { } 19 + #endif 20 + 21 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 22 + static inline void arch_vtime_task_switch(struct task_struct *tsk) { } 23 + static inline void vtime_user_enter(struct task_struct *tsk) 24 + { 25 + vtime_account_system(tsk); 26 + } 27 + static inline void vtime_user_exit(struct task_struct *tsk) 28 + { 29 + vtime_account_user(tsk); 30 + } 31 + #else 32 + static inline void vtime_user_enter(struct task_struct *tsk) { } 33 + static inline void vtime_user_exit(struct task_struct *tsk) { } 18 34 #endif 19 35 20 36 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
+22 -1
init/Kconfig
··· 326 326 327 327 menu "CPU/Task time and stats accounting" 328 328 329 + config VIRT_CPU_ACCOUNTING 330 + bool 331 + 329 332 choice 330 333 prompt "Cputime accounting" 331 334 default TICK_CPU_ACCOUNTING if !PPC64 ··· 345 342 346 343 If unsure, say Y. 347 344 348 - config VIRT_CPU_ACCOUNTING 345 + config VIRT_CPU_ACCOUNTING_NATIVE 349 346 bool "Deterministic task and CPU time accounting" 350 347 depends on HAVE_VIRT_CPU_ACCOUNTING 348 + select VIRT_CPU_ACCOUNTING 351 349 help 352 350 Select this option to enable more accurate task and CPU time 353 351 accounting. This is done by reading a CPU counter on each ··· 357 353 small performance impact. In the case of s390 or IBM POWER > 5, 358 354 this also enables accounting of stolen time on logically-partitioned 359 355 systems. 356 + 357 + config VIRT_CPU_ACCOUNTING_GEN 358 + bool "Full dynticks CPU time accounting" 359 + depends on HAVE_CONTEXT_TRACKING && 64BIT 360 + select VIRT_CPU_ACCOUNTING 361 + select CONTEXT_TRACKING 362 + help 363 + Select this option to enable task and CPU time accounting on full 364 + dynticks systems. This accounting is implemented by watching every 365 + kernel-user boundaries using the context tracking subsystem. 366 + The accounting is thus performed at the expense of some significant 367 + overhead. 368 + 369 + For now this is only useful if you are working on the full 370 + dynticks subsystem development. 371 + 372 + If unsure, say N. 360 373 361 374 config IRQ_TIME_ACCOUNTING 362 375 bool "Fine granularity task level IRQ time accounting"
+4 -2
kernel/context_tracking.c
··· 30 30 local_irq_save(flags); 31 31 if (__this_cpu_read(context_tracking.active) && 32 32 __this_cpu_read(context_tracking.state) != IN_USER) { 33 - __this_cpu_write(context_tracking.state, IN_USER); 33 + vtime_user_enter(current); 34 34 rcu_user_enter(); 35 + __this_cpu_write(context_tracking.state, IN_USER); 35 36 } 36 37 local_irq_restore(flags); 37 38 } ··· 54 53 55 54 local_irq_save(flags); 56 55 if (__this_cpu_read(context_tracking.state) == IN_USER) { 57 - __this_cpu_write(context_tracking.state, IN_KERNEL); 58 56 rcu_user_exit(); 57 + vtime_user_exit(current); 58 + __this_cpu_write(context_tracking.state, IN_KERNEL); 59 59 } 60 60 local_irq_restore(flags); 61 61 }
+57 -4
kernel/sched/cputime.c
··· 3 3 #include <linux/tsacct_kern.h> 4 4 #include <linux/kernel_stat.h> 5 5 #include <linux/static_key.h> 6 + #include <linux/context_tracking.h> 6 7 #include "sched.h" 7 8 8 9 ··· 480 479 else 481 480 vtime_account_system(prev); 482 481 482 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 483 483 vtime_account_user(prev); 484 + #endif 484 485 arch_vtime_task_switch(prev); 485 486 } 486 487 #endif ··· 498 495 #ifndef __ARCH_HAS_VTIME_ACCOUNT 499 496 void vtime_account(struct task_struct *tsk) 500 497 { 501 - if (in_interrupt() || !is_idle_task(tsk)) 502 - vtime_account_system(tsk); 503 - else 504 - vtime_account_idle(tsk); 498 + if (!in_interrupt()) { 499 + /* 500 + * If we interrupted user, context_tracking_in_user() 501 + * is 1 because the context tracking don't hook 502 + * on irq entry/exit. This way we know if 503 + * we need to flush user time on kernel entry. 504 + */ 505 + if (context_tracking_in_user()) { 506 + vtime_account_user(tsk); 507 + return; 508 + } 509 + 510 + if (is_idle_task(tsk)) { 511 + vtime_account_idle(tsk); 512 + return; 513 + } 514 + } 515 + vtime_account_system(tsk); 505 516 } 506 517 EXPORT_SYMBOL_GPL(vtime_account); 507 518 #endif /* __ARCH_HAS_VTIME_ACCOUNT */ ··· 600 583 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); 601 584 } 602 585 #endif 586 + 587 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 588 + static DEFINE_PER_CPU(unsigned long long, cputime_snap); 589 + 590 + static cputime_t get_vtime_delta(void) 591 + { 592 + unsigned long long delta; 593 + 594 + delta = sched_clock() - __this_cpu_read(cputime_snap); 595 + __this_cpu_add(cputime_snap, delta); 596 + 597 + /* CHECKME: always safe to convert nsecs to cputime? */ 598 + return nsecs_to_cputime(delta); 599 + } 600 + 601 + void vtime_account_system(struct task_struct *tsk) 602 + { 603 + cputime_t delta_cpu = get_vtime_delta(); 604 + 605 + account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); 606 + } 607 + 608 + void vtime_account_user(struct task_struct *tsk) 609 + { 610 + cputime_t delta_cpu = get_vtime_delta(); 611 + 612 + account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); 613 + } 614 + 615 + void vtime_account_idle(struct task_struct *tsk) 616 + { 617 + cputime_t delta_cpu = get_vtime_delta(); 618 + 619 + account_idle_time(delta_cpu); 620 + } 621 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */