Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

context_tracking: New context tracking susbsystem

Create a new subsystem that probes on kernel boundaries
to keep track of the transitions between level contexts
with two basic initial contexts: user or kernel.

This is an abstraction of some RCU code that use such tracking
to implement its userspace extended quiescent state.

We need to pull this up from RCU into this new level of indirection
because this tracking is also going to be used to implement an "on
demand" generic virtual cputime accounting. A necessary step to
shutdown the tick while still accounting the cputime.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Gilad Ben-Yossef <gilad@benyossef.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
[ paulmck: fix whitespace error and email address. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

authored by

Frederic Weisbecker and committed by
Paul E. McKenney
91d1aa43 4e79752c

+150 -108
+8 -7
arch/Kconfig
··· 300 300 301 301 See Documentation/prctl/seccomp_filter.txt for details. 302 302 303 - config HAVE_RCU_USER_QS 303 + config HAVE_CONTEXT_TRACKING 304 304 bool 305 305 help 306 - Provide kernel entry/exit hooks necessary for userspace 307 - RCU extended quiescent state. Syscalls need to be wrapped inside 308 - rcu_user_exit()-rcu_user_enter() through the slow path using 309 - TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs 310 - are already protected inside rcu_irq_enter/rcu_irq_exit() but 311 - preemption or signal handling on irq exit still need to be protected. 306 + Provide kernel/user boundaries probes necessary for subsystems 307 + that need it, such as userspace RCU extended quiescent state. 308 + Syscalls need to be wrapped inside user_exit()-user_enter() through 309 + the slow path using TIF_NOHZ flag. Exceptions handlers must be 310 + wrapped as well. Irqs are already protected inside 311 + rcu_irq_enter/rcu_irq_exit() but preemption or signal handling on 312 + irq exit still need to be protected. 312 313 313 314 config HAVE_VIRT_CPU_ACCOUNTING 314 315 bool
+1 -1
arch/x86/Kconfig
··· 106 106 select KTIME_SCALAR if X86_32 107 107 select GENERIC_STRNCPY_FROM_USER 108 108 select GENERIC_STRNLEN_USER 109 - select HAVE_RCU_USER_QS if X86_64 109 + select HAVE_CONTEXT_TRACKING if X86_64 110 110 select HAVE_IRQ_TIME_ACCOUNTING 111 111 select GENERIC_KERNEL_THREAD 112 112 select GENERIC_KERNEL_EXECVE
+7 -8
arch/x86/include/asm/rcu.h arch/x86/include/asm/context_tracking.h
··· 1 - #ifndef _ASM_X86_RCU_H 2 - #define _ASM_X86_RCU_H 1 + #ifndef _ASM_X86_CONTEXT_TRACKING_H 2 + #define _ASM_X86_CONTEXT_TRACKING_H 3 3 4 4 #ifndef __ASSEMBLY__ 5 - 6 - #include <linux/rcupdate.h> 5 + #include <linux/context_tracking.h> 7 6 #include <asm/ptrace.h> 8 7 9 8 static inline void exception_enter(struct pt_regs *regs) 10 9 { 11 - rcu_user_exit(); 10 + user_exit(); 12 11 } 13 12 14 13 static inline void exception_exit(struct pt_regs *regs) 15 14 { 16 - #ifdef CONFIG_RCU_USER_QS 15 + #ifdef CONFIG_CONTEXT_TRACKING 17 16 if (user_mode(regs)) 18 - rcu_user_enter(); 17 + user_enter(); 19 18 #endif 20 19 } 21 20 22 21 #else /* __ASSEMBLY__ */ 23 22 24 - #ifdef CONFIG_RCU_USER_QS 23 + #ifdef CONFIG_CONTEXT_TRACKING 25 24 # define SCHEDULE_USER call schedule_user 26 25 #else 27 26 # define SCHEDULE_USER call schedule
+1 -1
arch/x86/kernel/entry_64.S
··· 56 56 #include <asm/ftrace.h> 57 57 #include <asm/percpu.h> 58 58 #include <asm/asm.h> 59 - #include <asm/rcu.h> 59 + #include <asm/context_tracking.h> 60 60 #include <asm/smap.h> 61 61 #include <linux/err.h> 62 62
+4 -4
arch/x86/kernel/ptrace.c
··· 21 21 #include <linux/signal.h> 22 22 #include <linux/perf_event.h> 23 23 #include <linux/hw_breakpoint.h> 24 - #include <linux/rcupdate.h> 24 + #include <linux/context_tracking.h> 25 25 26 26 #include <asm/uaccess.h> 27 27 #include <asm/pgtable.h> ··· 1461 1461 { 1462 1462 long ret = 0; 1463 1463 1464 - rcu_user_exit(); 1464 + user_exit(); 1465 1465 1466 1466 /* 1467 1467 * If we stepped into a sysenter/syscall insn, it trapped in ··· 1516 1516 * or do_notify_resume(), in which case we can be in RCU 1517 1517 * user mode. 1518 1518 */ 1519 - rcu_user_exit(); 1519 + user_exit(); 1520 1520 1521 1521 audit_syscall_exit(regs); 1522 1522 ··· 1534 1534 if (step || test_thread_flag(TIF_SYSCALL_TRACE)) 1535 1535 tracehook_report_syscall_exit(regs, step); 1536 1536 1537 - rcu_user_enter(); 1537 + user_enter(); 1538 1538 }
+3 -2
arch/x86/kernel/signal.c
··· 22 22 #include <linux/uaccess.h> 23 23 #include <linux/user-return-notifier.h> 24 24 #include <linux/uprobes.h> 25 + #include <linux/context_tracking.h> 25 26 26 27 #include <asm/processor.h> 27 28 #include <asm/ucontext.h> ··· 817 816 void 818 817 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 819 818 { 820 - rcu_user_exit(); 819 + user_exit(); 821 820 822 821 #ifdef CONFIG_X86_MCE 823 822 /* notify userspace of pending MCEs */ ··· 841 840 if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) 842 841 fire_user_return_notifiers(); 843 842 844 - rcu_user_enter(); 843 + user_enter(); 845 844 } 846 845 847 846 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+1 -1
arch/x86/kernel/traps.c
··· 55 55 #include <asm/i387.h> 56 56 #include <asm/fpu-internal.h> 57 57 #include <asm/mce.h> 58 - #include <asm/rcu.h> 58 + #include <asm/context_tracking.h> 59 59 60 60 #include <asm/mach_traps.h> 61 61
+1 -1
arch/x86/mm/fault.c
··· 18 18 #include <asm/pgalloc.h> /* pgd_*(), ... */ 19 19 #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ 20 20 #include <asm/fixmap.h> /* VSYSCALL_START */ 21 - #include <asm/rcu.h> /* exception_enter(), ... */ 21 + #include <asm/context_tracking.h> /* exception_enter(), ... */ 22 22 23 23 /* 24 24 * Page fault error code bits:
+18
include/linux/context_tracking.h
··· 1 + #ifndef _LINUX_CONTEXT_TRACKING_H 2 + #define _LINUX_CONTEXT_TRACKING_H 3 + 4 + #ifdef CONFIG_CONTEXT_TRACKING 5 + #include <linux/sched.h> 6 + 7 + extern void user_enter(void); 8 + extern void user_exit(void); 9 + extern void context_tracking_task_switch(struct task_struct *prev, 10 + struct task_struct *next); 11 + #else 12 + static inline void user_enter(void) { } 13 + static inline void user_exit(void) { } 14 + static inline void context_tracking_task_switch(struct task_struct *prev, 15 + struct task_struct *next) { } 16 + #endif /* !CONFIG_CONTEXT_TRACKING */ 17 + 18 + #endif
-2
include/linux/rcupdate.h
··· 222 222 extern void rcu_user_exit(void); 223 223 extern void rcu_user_enter_after_irq(void); 224 224 extern void rcu_user_exit_after_irq(void); 225 - extern void rcu_user_hooks_switch(struct task_struct *prev, 226 - struct task_struct *next); 227 225 #else 228 226 static inline void rcu_user_enter(void) { } 229 227 static inline void rcu_user_exit(void) { }
+14 -14
init/Kconfig
··· 486 486 This option enables preemptible-RCU code that is common between 487 487 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. 488 488 489 + config CONTEXT_TRACKING 490 + bool 491 + 489 492 config RCU_USER_QS 490 493 bool "Consider userspace as in RCU extended quiescent state" 491 - depends on HAVE_RCU_USER_QS && SMP 494 + depends on HAVE_CONTEXT_TRACKING && SMP 495 + select CONTEXT_TRACKING 492 496 help 493 497 This option sets hooks on kernel / userspace boundaries and 494 498 puts RCU in extended quiescent state when the CPU runs in ··· 501 497 try to keep the timer tick on for RCU. 502 498 503 499 Unless you want to hack and help the development of the full 504 - tickless feature, you shouldn't enable this option. It also 500 + dynticks mode, you shouldn't enable this option. It also 505 501 adds unnecessary overhead. 506 502 507 503 If unsure say N 508 504 509 - config RCU_USER_QS_FORCE 510 - bool "Force userspace extended QS by default" 511 - depends on RCU_USER_QS 505 + config CONTEXT_TRACKING_FORCE 506 + bool "Force context tracking" 507 + depends on CONTEXT_TRACKING 512 508 help 513 - Set the hooks in user/kernel boundaries by default in order to 514 - test this feature that treats userspace as an extended quiescent 515 - state until we have a real user like a full adaptive nohz option. 516 - 517 - Unless you want to hack and help the development of the full 518 - tickless feature, you shouldn't enable this option. It adds 519 - unnecessary overhead. 520 - 521 - If unsure say N 509 + Probe on user/kernel boundaries by default in order to 510 + test the features that rely on it such as userspace RCU extended 511 + quiescent states. 512 + This test is there for debugging until we have a real user like the 513 + full dynticks mode. 522 514 523 515 config RCU_FANOUT 524 516 int "Tree-based hierarchical RCU fanout value"
+1
kernel/Makefile
··· 110 110 obj-$(CONFIG_PADATA) += padata.o 111 111 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 112 112 obj-$(CONFIG_JUMP_LABEL) += jump_label.o 113 + obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o 113 114 114 115 $(obj)/configs.o: $(obj)/config_data.h 115 116
+83
kernel/context_tracking.c
··· 1 + #include <linux/context_tracking.h> 2 + #include <linux/rcupdate.h> 3 + #include <linux/sched.h> 4 + #include <linux/percpu.h> 5 + #include <linux/hardirq.h> 6 + 7 + struct context_tracking { 8 + /* 9 + * When active is false, hooks are not set to 10 + * minimize overhead: TIF flags are cleared 11 + * and calls to user_enter/exit are ignored. This 12 + * may be further optimized using static keys. 13 + */ 14 + bool active; 15 + enum { 16 + IN_KERNEL = 0, 17 + IN_USER, 18 + } state; 19 + }; 20 + 21 + static DEFINE_PER_CPU(struct context_tracking, context_tracking) = { 22 + #ifdef CONFIG_CONTEXT_TRACKING_FORCE 23 + .active = true, 24 + #endif 25 + }; 26 + 27 + void user_enter(void) 28 + { 29 + unsigned long flags; 30 + 31 + /* 32 + * Some contexts may involve an exception occuring in an irq, 33 + * leading to that nesting: 34 + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() 35 + * This would mess up the dyntick_nesting count though. And rcu_irq_*() 36 + * helpers are enough to protect RCU uses inside the exception. So 37 + * just return immediately if we detect we are in an IRQ. 38 + */ 39 + if (in_interrupt()) 40 + return; 41 + 42 + WARN_ON_ONCE(!current->mm); 43 + 44 + local_irq_save(flags); 45 + if (__this_cpu_read(context_tracking.active) && 46 + __this_cpu_read(context_tracking.state) != IN_USER) { 47 + __this_cpu_write(context_tracking.state, IN_USER); 48 + rcu_user_enter(); 49 + } 50 + local_irq_restore(flags); 51 + } 52 + 53 + void user_exit(void) 54 + { 55 + unsigned long flags; 56 + 57 + /* 58 + * Some contexts may involve an exception occuring in an irq, 59 + * leading to that nesting: 60 + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() 61 + * This would mess up the dyntick_nesting count though. And rcu_irq_*() 62 + * helpers are enough to protect RCU uses inside the exception. So 63 + * just return immediately if we detect we are in an IRQ. 64 + */ 65 + if (in_interrupt()) 66 + return; 67 + 68 + local_irq_save(flags); 69 + if (__this_cpu_read(context_tracking.state) == IN_USER) { 70 + __this_cpu_write(context_tracking.state, IN_KERNEL); 71 + rcu_user_exit(); 72 + } 73 + local_irq_restore(flags); 74 + } 75 + 76 + void context_tracking_task_switch(struct task_struct *prev, 77 + struct task_struct *next) 78 + { 79 + if (__this_cpu_read(context_tracking.active)) { 80 + clear_tsk_thread_flag(prev, TIF_NOHZ); 81 + set_tsk_thread_flag(next, TIF_NOHZ); 82 + } 83 + }
+2 -62
kernel/rcutree.c
··· 207 207 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 208 208 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, 209 209 .dynticks = ATOMIC_INIT(1), 210 - #if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE) 211 - .ignore_user_qs = true, 212 - #endif 213 210 }; 214 211 215 212 static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ ··· 417 420 */ 418 421 void rcu_user_enter(void) 419 422 { 420 - unsigned long flags; 421 - struct rcu_dynticks *rdtp; 422 - 423 - /* 424 - * Some contexts may involve an exception occuring in an irq, 425 - * leading to that nesting: 426 - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() 427 - * This would mess up the dyntick_nesting count though. And rcu_irq_*() 428 - * helpers are enough to protect RCU uses inside the exception. So 429 - * just return immediately if we detect we are in an IRQ. 430 - */ 431 - if (in_interrupt()) 432 - return; 433 - 434 - WARN_ON_ONCE(!current->mm); 435 - 436 - local_irq_save(flags); 437 - rdtp = &__get_cpu_var(rcu_dynticks); 438 - if (!rdtp->ignore_user_qs && !rdtp->in_user) { 439 - rdtp->in_user = true; 440 - rcu_eqs_enter(true); 441 - } 442 - local_irq_restore(flags); 423 + rcu_eqs_enter(1); 443 424 } 444 425 445 426 /** ··· 554 579 */ 555 580 void rcu_user_exit(void) 556 581 { 557 - unsigned long flags; 558 - struct rcu_dynticks *rdtp; 559 - 560 - /* 561 - * Some contexts may involve an exception occuring in an irq, 562 - * leading to that nesting: 563 - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() 564 - * This would mess up the dyntick_nesting count though. And rcu_irq_*() 565 - * helpers are enough to protect RCU uses inside the exception. So 566 - * just return immediately if we detect we are in an IRQ. 567 - */ 568 - if (in_interrupt()) 569 - return; 570 - 571 - local_irq_save(flags); 572 - rdtp = &__get_cpu_var(rcu_dynticks); 573 - if (rdtp->in_user) { 574 - rdtp->in_user = false; 575 - rcu_eqs_exit(true); 576 - } 577 - local_irq_restore(flags); 582 + rcu_eqs_exit(1); 578 583 } 579 584 580 585 /** ··· 676 721 return ret; 677 722 } 678 723 EXPORT_SYMBOL(rcu_is_cpu_idle); 679 - 680 - #ifdef CONFIG_RCU_USER_QS 681 - void rcu_user_hooks_switch(struct task_struct *prev, 682 - struct task_struct *next) 683 - { 684 - struct rcu_dynticks *rdtp; 685 - 686 - /* Interrupts are disabled in context switch */ 687 - rdtp = &__get_cpu_var(rcu_dynticks); 688 - if (!rdtp->ignore_user_qs) { 689 - clear_tsk_thread_flag(prev, TIF_NOHZ); 690 - set_tsk_thread_flag(next, TIF_NOHZ); 691 - } 692 - } 693 - #endif /* #ifdef CONFIG_RCU_USER_QS */ 694 724 695 725 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) 696 726
+6 -5
kernel/sched/core.c
··· 72 72 #include <linux/slab.h> 73 73 #include <linux/init_task.h> 74 74 #include <linux/binfmts.h> 75 + #include <linux/context_tracking.h> 75 76 76 77 #include <asm/switch_to.h> 77 78 #include <asm/tlb.h> ··· 1887 1886 spin_release(&rq->lock.dep_map, 1, _THIS_IP_); 1888 1887 #endif 1889 1888 1889 + context_tracking_task_switch(prev, next); 1890 1890 /* Here we just switch the register state and the stack. */ 1891 - rcu_user_hooks_switch(prev, next); 1892 1891 switch_to(prev, next, prev); 1893 1892 1894 1893 barrier(); ··· 2912 2911 } 2913 2912 EXPORT_SYMBOL(schedule); 2914 2913 2915 - #ifdef CONFIG_RCU_USER_QS 2914 + #ifdef CONFIG_CONTEXT_TRACKING 2916 2915 asmlinkage void __sched schedule_user(void) 2917 2916 { 2918 2917 /* ··· 2921 2920 * we haven't yet exited the RCU idle mode. Do it here manually until 2922 2921 * we find a better solution. 2923 2922 */ 2924 - rcu_user_exit(); 2923 + user_exit(); 2925 2924 schedule(); 2926 - rcu_user_enter(); 2925 + user_enter(); 2927 2926 } 2928 2927 #endif 2929 2928 ··· 3028 3027 /* Catch callers which need to be fixed */ 3029 3028 BUG_ON(ti->preempt_count || !irqs_disabled()); 3030 3029 3031 - rcu_user_exit(); 3030 + user_exit(); 3032 3031 do { 3033 3032 add_preempt_count(PREEMPT_ACTIVE); 3034 3033 local_irq_enable();