Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

make seccomp zerocost in schedule

This follows a suggestion from Chuck Ebbert on how to make seccomp
absolutely zerocost in schedule too. The only remaining footprint of
seccomp is in terms of the bzImage size that becomes a few bytes (perhaps
even a few kbytes) larger, measure it if you care in the embedded.

Signed-off-by: Andrea Arcangeli <andrea@cpushare.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Andrea Arcangeli and committed by
Linus Torvalds
cf99abac 1d9d02fe

+50 -45
+39 -34
arch/i386/kernel/process.c
··· 538 538 return 1; 539 539 } 540 540 541 - static noinline void __switch_to_xtra(struct task_struct *next_p, 542 - struct tss_struct *tss) 541 + #ifdef CONFIG_SECCOMP 542 + void hard_disable_TSC(void) 543 + { 544 + write_cr4(read_cr4() | X86_CR4_TSD); 545 + } 546 + void disable_TSC(void) 547 + { 548 + preempt_disable(); 549 + if (!test_and_set_thread_flag(TIF_NOTSC)) 550 + /* 551 + * Must flip the CPU state synchronously with 552 + * TIF_NOTSC in the current running context. 553 + */ 554 + hard_disable_TSC(); 555 + preempt_enable(); 556 + } 557 + void hard_enable_TSC(void) 558 + { 559 + write_cr4(read_cr4() & ~X86_CR4_TSD); 560 + } 561 + #endif /* CONFIG_SECCOMP */ 562 + 563 + static noinline void 564 + __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 565 + struct tss_struct *tss) 543 566 { 544 567 struct thread_struct *next; 545 568 ··· 577 554 set_debugreg(next->debugreg[6], 6); 578 555 set_debugreg(next->debugreg[7], 7); 579 556 } 557 + 558 + #ifdef CONFIG_SECCOMP 559 + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 560 + test_tsk_thread_flag(next_p, TIF_NOTSC)) { 561 + /* prev and next are different */ 562 + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) 563 + hard_disable_TSC(); 564 + else 565 + hard_enable_TSC(); 566 + } 567 + #endif 580 568 581 569 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 582 570 /* ··· 617 583 * perform any I/O during its timeslice. 618 584 */ 619 585 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; 620 - } 621 - 622 - /* 623 - * This function selects if the context switch from prev to next 624 - * has to tweak the TSC disable bit in the cr4. 625 - */ 626 - static inline void disable_tsc(struct task_struct *prev_p, 627 - struct task_struct *next_p) 628 - { 629 - struct thread_info *prev, *next; 630 - 631 - /* 632 - * gcc should eliminate the ->thread_info dereference if 633 - * has_secure_computing returns 0 at compile time (SECCOMP=n). 634 - */ 635 - prev = task_thread_info(prev_p); 636 - next = task_thread_info(next_p); 637 - 638 - if (has_secure_computing(prev) || has_secure_computing(next)) { 639 - /* slow path here */ 640 - if (has_secure_computing(prev) && 641 - !has_secure_computing(next)) { 642 - write_cr4(read_cr4() & ~X86_CR4_TSD); 643 - } else if (!has_secure_computing(prev) && 644 - has_secure_computing(next)) 645 - write_cr4(read_cr4() | X86_CR4_TSD); 646 - } 647 586 } 648 587 649 588 /* ··· 696 689 /* 697 690 * Now maybe handle debug registers and/or IO bitmaps 698 691 */ 699 - if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) 700 - || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))) 701 - __switch_to_xtra(next_p, tss); 702 - 703 - disable_tsc(prev_p, next_p); 692 + if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || 693 + task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) 694 + __switch_to_xtra(prev_p, next_p, tss); 704 695 705 696 /* 706 697 * Leave lazy mode, flushing any hypercalls made here.
+4
include/asm-i386/processor.h
··· 228 228 229 229 #define HAVE_ARCH_PICK_MMAP_LAYOUT 230 230 231 + extern void hard_disable_TSC(void); 232 + extern void disable_TSC(void); 233 + extern void hard_enable_TSC(void); 234 + 231 235 /* 232 236 * Size of io_bitmap. 233 237 */
+4 -1
include/asm-i386/thread_info.h
··· 137 137 #define TIF_DEBUG 17 /* uses debug registers */ 138 138 #define TIF_IO_BITMAP 18 /* uses I/O bitmap */ 139 139 #define TIF_FREEZE 19 /* is freezing for suspend */ 140 + #define TIF_NOTSC 20 /* TSC is not accessible in userland */ 140 141 141 142 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 142 143 #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) ··· 152 151 #define _TIF_DEBUG (1<<TIF_DEBUG) 153 152 #define _TIF_IO_BITMAP (1<<TIF_IO_BITMAP) 154 153 #define _TIF_FREEZE (1<<TIF_FREEZE) 154 + #define _TIF_NOTSC (1<<TIF_NOTSC) 155 155 156 156 /* work to do on interrupt/exception return */ 157 157 #define _TIF_WORK_MASK \ ··· 162 160 #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) 163 161 164 162 /* flags to check in __switch_to() */ 165 - #define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP) 163 + #define _TIF_WORK_CTXSW_NEXT (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUG) 164 + #define _TIF_WORK_CTXSW_PREV (_TIF_IO_BITMAP | _TIF_NOTSC) 166 165 167 166 /* 168 167 * Thread-synchronous status.
-10
include/linux/seccomp.h
··· 16 16 __secure_computing(this_syscall); 17 17 } 18 18 19 - static inline int has_secure_computing(struct thread_info *ti) 20 - { 21 - return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP)); 22 - } 23 - 24 19 extern long prctl_get_seccomp(void); 25 20 extern long prctl_set_seccomp(unsigned long); 26 21 ··· 24 29 typedef struct { } seccomp_t; 25 30 26 31 #define secure_computing(x) do { } while (0) 27 - /* static inline to preserve typechecking */ 28 - static inline int has_secure_computing(struct thread_info *ti) 29 - { 30 - return 0; 31 - } 32 32 33 33 static inline long prctl_get_seccomp(void) 34 34 {
+3
kernel/seccomp.c
··· 74 74 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 75 75 current->seccomp.mode = seccomp_mode; 76 76 set_thread_flag(TIF_SECCOMP); 77 + #ifdef TIF_NOTSC 78 + disable_TSC(); 79 + #endif 77 80 ret = 0; 78 81 } 79 82