Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal

Pull third pile of kernel_execve() patches from Al Viro:
"The last bits of infrastructure for kernel_thread() et.al., with
alpha/arm/x86 use of those. Plus sanitizing the asm glue and
do_notify_resume() on alpha, fixing the "disabled irq while running
task_work stuff" breakage there.

At that point the rest of kernel_thread/kernel_execve/sys_execve work
can be done independently for different architectures. The only
pending bits that do depend on having all architectures converted are
restrictred to fs/* and kernel/* - that'll obviously have to wait for
the next cycle.

I thought we'd have to wait for all of them done before we start
eliminating the longjump-style insanity in kernel_execve(), but it
turned out there's a very simple way to do that without flagday-style
changes."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal:
alpha: switch to saner kernel_execve() semantics
arm: switch to saner kernel_execve() semantics
x86, um: convert to saner kernel_execve() semantics
infrastructure for saner ret_from_kernel_thread semantics
make sure that kernel_thread() callbacks call do_exit() themselves
make sure that we always have a return path from kernel_execve()
ppc: eeh_event should just use kthread_run()
don't bother with kernel_thread/kernel_execve for launching linuxrc
alpha: get rid of switch_stack argument of do_work_pending()
alpha: don't bother passing switch_stack separately from regs
alpha: take SIGPENDING/NOTIFY_RESUME loop into signal.c
alpha: simplify TIF_NEED_RESCHED handling

+137 -223
+3
arch/Kconfig
··· 274 274 config GENERIC_KERNEL_THREAD 275 275 bool 276 276 277 + config GENERIC_KERNEL_EXECVE 278 + bool 279 + 277 280 config HAVE_ARCH_SECCOMP_FILTER 278 281 bool 279 282 help
+1
arch/alpha/Kconfig
··· 21 21 select GENERIC_STRNCPY_FROM_USER 22 22 select GENERIC_STRNLEN_USER 23 23 select GENERIC_KERNEL_THREAD 24 + select GENERIC_KERNEL_EXECVE 24 25 help 25 26 The Alpha is a 64-bit general-purpose processor designed and 26 27 marketed by the Digital Equipment Corporation of blessed memory,
-1
arch/alpha/include/asm/unistd.h
··· 482 482 #define __ARCH_WANT_SYS_SIGPENDING 483 483 #define __ARCH_WANT_SYS_RT_SIGSUSPEND 484 484 #define __ARCH_WANT_SYS_EXECVE 485 - #define __ARCH_WANT_KERNEL_EXECVE 486 485 487 486 /* "Conditional" syscalls. What we want is 488 487
+28 -59
arch/alpha/kernel/entry.S
··· 311 311 312 312 .align 4 313 313 ret_from_sys_call: 314 - cmovne $26, 0, $19 /* $19 = 0 => non-restartable */ 314 + cmovne $26, 0, $18 /* $18 = 0 => non-restartable */ 315 315 ldq $0, SP_OFF($sp) 316 316 and $0, 8, $0 317 317 beq $0, ret_to_kernel ··· 320 320 sampling and the rti. */ 321 321 lda $16, 7 322 322 call_pal PAL_swpipl 323 - ldl $5, TI_FLAGS($8) 324 - and $5, _TIF_WORK_MASK, $2 323 + ldl $17, TI_FLAGS($8) 324 + and $17, _TIF_WORK_MASK, $2 325 325 bne $2, work_pending 326 326 restore_all: 327 327 RESTORE_ALL ··· 341 341 * frame to indicate that a negative return value wasn't an 342 342 * error number.. 343 343 */ 344 - ldq $19, 0($sp) /* old syscall nr (zero if success) */ 345 - beq $19, $ret_success 344 + ldq $18, 0($sp) /* old syscall nr (zero if success) */ 345 + beq $18, $ret_success 346 346 347 - ldq $20, 72($sp) /* .. and this a3 */ 347 + ldq $19, 72($sp) /* .. and this a3 */ 348 348 subq $31, $0, $0 /* with error in v0 */ 349 349 addq $31, 1, $1 /* set a3 for errno return */ 350 350 stq $0, 0($sp) ··· 362 362 * Do all cleanup when returning from all interrupts and system calls. 363 363 * 364 364 * Arguments: 365 - * $5: TI_FLAGS. 366 365 * $8: current. 367 - * $19: The old syscall number, or zero if this is not a return 366 + * $17: TI_FLAGS. 367 + * $18: The old syscall number, or zero if this is not a return 368 368 * from a syscall that errored and is possibly restartable. 369 - * $20: The old a3 value 369 + * $19: The old a3 value 370 370 */ 371 371 372 372 .align 4 373 373 .ent work_pending 374 374 work_pending: 375 - and $5, _TIF_NEED_RESCHED, $2 376 - beq $2, $work_notifysig 375 + and $17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2 376 + bne $2, $work_notifysig 377 377 378 378 $work_resched: 379 - subq $sp, 16, $sp 380 - stq $19, 0($sp) /* save syscall nr */ 381 - stq $20, 8($sp) /* and error indication (a3) */ 379 + /* 380 + * We can get here only if we returned from syscall without SIGPENDING 381 + * or got through work_notifysig already. Either case means no syscall 382 + * restarts for us, so let $18 and $19 burn. 383 + */ 382 384 jsr $26, schedule 383 - ldq $19, 0($sp) 384 - ldq $20, 8($sp) 385 - addq $sp, 16, $sp 386 - /* Make sure need_resched and sigpending don't change between 387 - sampling and the rti. */ 388 - lda $16, 7 389 - call_pal PAL_swpipl 390 - ldl $5, TI_FLAGS($8) 391 - and $5, _TIF_WORK_MASK, $2 392 - beq $2, restore_all 393 - and $5, _TIF_NEED_RESCHED, $2 394 - bne $2, $work_resched 385 + mov 0, $18 386 + br ret_to_user 395 387 396 388 $work_notifysig: 397 389 mov $sp, $16 398 390 bsr $1, do_switch_stack 399 - mov $sp, $17 400 - mov $5, $18 401 - mov $19, $9 /* save old syscall number */ 402 - mov $20, $10 /* save old a3 */ 403 - and $5, _TIF_SIGPENDING, $2 404 - cmovne $2, 0, $9 /* we don't want double syscall restarts */ 405 - jsr $26, do_notify_resume 406 - mov $9, $19 407 - mov $10, $20 391 + jsr $26, do_work_pending 408 392 bsr $1, undo_switch_stack 409 - br ret_to_user 393 + br restore_all 410 394 .end work_pending 411 395 412 396 /* ··· 438 454 439 455 .align 3 440 456 $strace_error: 441 - ldq $19, 0($sp) /* old syscall nr (zero if success) */ 442 - beq $19, $strace_success 443 - ldq $20, 72($sp) /* .. and this a3 */ 457 + ldq $18, 0($sp) /* old syscall nr (zero if success) */ 458 + beq $18, $strace_success 459 + ldq $19, 72($sp) /* .. and this a3 */ 444 460 445 461 subq $31, $0, $0 /* with error in v0 */ 446 462 addq $31, 1, $1 /* set a3 for errno return */ ··· 448 464 stq $1, 72($sp) /* a3 for return */ 449 465 450 466 bsr $1, do_switch_stack 451 - mov $19, $9 /* save old syscall number */ 452 - mov $20, $10 /* save old a3 */ 467 + mov $18, $9 /* save old syscall number */ 468 + mov $19, $10 /* save old a3 */ 453 469 jsr $26, syscall_trace_leave 454 - mov $9, $19 455 - mov $10, $20 470 + mov $9, $18 471 + mov $10, $19 456 472 bsr $1, undo_switch_stack 457 473 458 474 mov $31, $26 /* tell "ret_from_sys_call" we can restart */ ··· 603 619 mov $9, $27 604 620 mov $10, $16 605 621 jsr $26, ($9) 606 - ldgp $gp, 0($26) 607 - mov $0, $16 608 - mov $31, $26 609 - jmp $31, sys_exit 610 - .end ret_from_kernel_thread 611 - 612 - .globl ret_from_kernel_execve 613 - .align 4 614 - .ent ret_from_kernel_execve 615 - ret_from_kernel_execve: 616 - mov $16, $sp 617 - /* Avoid the HAE being gratuitously wrong, to avoid restoring it. */ 618 - ldq $2, alpha_mv+HAE_CACHE 619 - stq $2, 152($sp) /* HAE */ 620 622 mov $31, $19 /* to disable syscall restarts */ 621 623 br $31, ret_to_user 622 - 623 - .end ret_from_kernel_execve 624 + .end ret_from_kernel_thread 624 625 625 626 626 627 /*
+28 -20
arch/alpha/kernel/signal.c
··· 298 298 299 299 static long 300 300 setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, 301 - struct switch_stack *sw, unsigned long mask, unsigned long sp) 301 + unsigned long mask, unsigned long sp) 302 302 { 303 + struct switch_stack *sw = (struct switch_stack *)regs - 1; 303 304 long i, err = 0; 304 305 305 306 err |= __put_user(on_sig_stack((unsigned long)sc), &sc->sc_onstack); ··· 355 354 356 355 static int 357 356 setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, 358 - struct pt_regs *regs, struct switch_stack * sw) 357 + struct pt_regs *regs) 359 358 { 360 359 unsigned long oldsp, r26, err = 0; 361 360 struct sigframe __user *frame; ··· 365 364 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 366 365 return -EFAULT; 367 366 368 - err |= setup_sigcontext(&frame->sc, regs, sw, set->sig[0], oldsp); 367 + err |= setup_sigcontext(&frame->sc, regs, set->sig[0], oldsp); 369 368 if (err) 370 369 return -EFAULT; 371 370 ··· 402 401 403 402 static int 404 403 setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 405 - sigset_t *set, struct pt_regs *regs, struct switch_stack * sw) 404 + sigset_t *set, struct pt_regs *regs) 406 405 { 407 406 unsigned long oldsp, r26, err = 0; 408 407 struct rt_sigframe __user *frame; ··· 421 420 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 422 421 err |= __put_user(sas_ss_flags(oldsp), &frame->uc.uc_stack.ss_flags); 423 422 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 424 - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw, 423 + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, 425 424 set->sig[0], oldsp); 426 425 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 427 426 if (err) ··· 465 464 */ 466 465 static inline void 467 466 handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info, 468 - struct pt_regs * regs, struct switch_stack *sw) 467 + struct pt_regs * regs) 469 468 { 470 469 sigset_t *oldset = sigmask_to_save(); 471 470 int ret; 472 471 473 472 if (ka->sa.sa_flags & SA_SIGINFO) 474 - ret = setup_rt_frame(sig, ka, info, oldset, regs, sw); 473 + ret = setup_rt_frame(sig, ka, info, oldset, regs); 475 474 else 476 - ret = setup_frame(sig, ka, oldset, regs, sw); 475 + ret = setup_frame(sig, ka, oldset, regs); 477 476 478 477 if (ret) { 479 478 force_sigsegv(sig, current); ··· 520 519 * all (if we get here from anything but a syscall return, it will be 0) 521 520 */ 522 521 static void 523 - do_signal(struct pt_regs * regs, struct switch_stack * sw, 524 - unsigned long r0, unsigned long r19) 522 + do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) 525 523 { 526 524 siginfo_t info; 527 525 int signr; ··· 537 537 /* Whee! Actually deliver the signal. */ 538 538 if (r0) 539 539 syscall_restart(r0, r19, regs, &ka); 540 - handle_signal(signr, &ka, &info, regs, sw); 540 + handle_signal(signr, &ka, &info, regs); 541 541 if (single_stepping) 542 542 ptrace_set_bpt(current); /* re-set bpt */ 543 543 return; ··· 568 568 } 569 569 570 570 void 571 - do_notify_resume(struct pt_regs *regs, struct switch_stack *sw, 572 - unsigned long thread_info_flags, 571 + do_work_pending(struct pt_regs *regs, unsigned long thread_flags, 573 572 unsigned long r0, unsigned long r19) 574 573 { 575 - if (thread_info_flags & _TIF_SIGPENDING) 576 - do_signal(regs, sw, r0, r19); 577 - 578 - if (thread_info_flags & _TIF_NOTIFY_RESUME) { 579 - clear_thread_flag(TIF_NOTIFY_RESUME); 580 - tracehook_notify_resume(regs); 581 - } 574 + do { 575 + if (thread_flags & _TIF_NEED_RESCHED) { 576 + schedule(); 577 + } else { 578 + local_irq_enable(); 579 + if (thread_flags & _TIF_SIGPENDING) { 580 + do_signal(regs, r0, r19); 581 + r0 = 0; 582 + } else { 583 + clear_thread_flag(TIF_NOTIFY_RESUME); 584 + tracehook_notify_resume(regs); 585 + } 586 + } 587 + local_irq_disable(); 588 + thread_flags = current_thread_info()->flags; 589 + } while (thread_flags & _TIF_WORK_MASK); 582 590 }
+1
arch/arm/Kconfig
··· 53 53 select GENERIC_STRNLEN_USER 54 54 select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN 55 55 select GENERIC_KERNEL_THREAD 56 + select GENERIC_KERNEL_EXECVE 56 57 help 57 58 The ARM series is a line of low-power-consumption RISC chip designs 58 59 licensed by ARM Ltd and targeted at embedded applications and
-1
arch/arm/include/asm/unistd.h
··· 479 479 #define __ARCH_WANT_SYS_SOCKETCALL 480 480 #endif 481 481 #define __ARCH_WANT_SYS_EXECVE 482 - #define __ARCH_WANT_KERNEL_EXECVE 483 482 484 483 /* 485 484 * "Conditional" syscalls
+4 -25
arch/arm/kernel/entry-common.S
··· 86 86 */ 87 87 ENTRY(ret_from_fork) 88 88 bl schedule_tail 89 + cmp r5, #0 90 + movne r0, r4 91 + movne lr, pc 92 + movne pc, r5 89 93 get_thread_info tsk 90 - mov why, #1 91 94 b ret_slow_syscall 92 95 ENDPROC(ret_from_fork) 93 - 94 - ENTRY(ret_from_kernel_thread) 95 - UNWIND(.fnstart) 96 - UNWIND(.cantunwind) 97 - bl schedule_tail 98 - mov r0, r4 99 - adr lr, BSYM(1f) @ kernel threads should not exit 100 - mov pc, r5 101 - 1: bl do_exit 102 - nop 103 - UNWIND(.fnend) 104 - ENDPROC(ret_from_kernel_thread) 105 - 106 - /* 107 - * turn a kernel thread into userland process 108 - * use: ret_from_kernel_execve(struct pt_regs *normal) 109 - */ 110 - ENTRY(ret_from_kernel_execve) 111 - mov why, #0 @ not a syscall 112 - str why, [r0, #S_R0] @ ... and we want 0 in ->ARM_r0 as well 113 - get_thread_info tsk @ thread structure 114 - mov sp, r0 @ stack pointer just under pt_regs 115 - b ret_slow_syscall 116 - ENDPROC(ret_from_kernel_execve) 117 96 118 97 .equ NR_syscalls,0 119 98 #define CALL(x) .equ NR_syscalls,NR_syscalls+1
+2 -3
arch/arm/kernel/process.c
··· 373 373 } 374 374 375 375 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 376 - asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); 377 376 378 377 int 379 378 copy_thread(unsigned long clone_flags, unsigned long stack_start, ··· 387 388 *childregs = *regs; 388 389 childregs->ARM_r0 = 0; 389 390 childregs->ARM_sp = stack_start; 390 - thread->cpu_context.pc = (unsigned long)ret_from_fork; 391 391 } else { 392 + memset(childregs, 0, sizeof(struct pt_regs)); 392 393 thread->cpu_context.r4 = stk_sz; 393 394 thread->cpu_context.r5 = stack_start; 394 - thread->cpu_context.pc = (unsigned long)ret_from_kernel_thread; 395 395 childregs->ARM_cpsr = SVC_MODE; 396 396 } 397 + thread->cpu_context.pc = (unsigned long)ret_from_fork; 397 398 thread->cpu_context.sp = (unsigned long)childregs; 398 399 399 400 clear_ptrace_hw_breakpoint(p);
+2 -3
arch/powerpc/platforms/pseries/eeh_event.c
··· 23 23 #include <linux/pci.h> 24 24 #include <linux/slab.h> 25 25 #include <linux/workqueue.h> 26 + #include <linux/kthread.h> 26 27 #include <asm/eeh_event.h> 27 28 #include <asm/ppc-pci.h> 28 29 ··· 59 58 unsigned long flags; 60 59 struct eeh_event *event; 61 60 struct eeh_pe *pe; 62 - 63 - set_task_comm(current, "eehd"); 64 61 65 62 spin_lock_irqsave(&eeh_eventlist_lock, flags); 66 63 event = NULL; ··· 107 108 */ 108 109 static void eeh_thread_launcher(struct work_struct *dummy) 109 110 { 110 - if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0) 111 + if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd"))) 111 112 printk(KERN_ERR "Failed to start EEH daemon\n"); 112 113 } 113 114
-2
arch/um/include/asm/processor-generic.h
··· 26 26 jmp_buf *fault_catcher; 27 27 struct task_struct *prev_sched; 28 28 unsigned long temp_stack; 29 - jmp_buf *exec_buf; 30 29 struct arch_thread arch; 31 30 jmp_buf switch_buf; 32 31 int mm_count; ··· 53 54 .fault_addr = NULL, \ 54 55 .prev_sched = NULL, \ 55 56 .temp_stack = 0, \ 56 - .exec_buf = NULL, \ 57 57 .arch = INIT_ARCH_THREAD, \ 58 58 .request = { 0 } \ 59 59 }
-1
arch/um/include/shared/os.h
··· 191 191 extern int os_getpgrp(void); 192 192 193 193 extern void init_new_thread_signals(void); 194 - extern int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr); 195 194 196 195 extern int os_map_memory(void *virt, int fd, unsigned long long off, 197 196 unsigned long len, int r, int w, int x);
-5
arch/um/kernel/exec.c
··· 47 47 #endif 48 48 } 49 49 EXPORT_SYMBOL(start_thread); 50 - 51 - void __noreturn ret_from_kernel_execve(struct pt_regs *unused) 52 - { 53 - UML_LONGJMP(current->thread.exec_buf, 1); 54 - }
+3 -7
arch/um/kernel/process.c
··· 135 135 arg = current->thread.request.u.thread.arg; 136 136 137 137 /* 138 - * The return value is 1 if the kernel thread execs a process, 139 - * 0 if it just exits 138 + * callback returns only if the kernel thread execs a process 140 139 */ 141 - n = run_kernel_thread(fn, arg, &current->thread.exec_buf); 142 - if (n == 1) 143 - userspace(&current->thread.regs.regs); 144 - else 145 - do_exit(0); 140 + n = fn(arg); 141 + userspace(&current->thread.regs.regs); 146 142 } 147 143 148 144 /* Called magically, see new_thread_handler above */
-13
arch/um/os-Linux/process.c
··· 244 244 signal(SIGWINCH, SIG_IGN); 245 245 signal(SIGTERM, SIG_DFL); 246 246 } 247 - 248 - int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr) 249 - { 250 - jmp_buf buf; 251 - int n; 252 - 253 - *jmp_ptr = &buf; 254 - n = UML_SETJMP(&buf); 255 - if (n != 0) 256 - return n; 257 - (*fn)(arg); 258 - return 0; 259 - }
+1
arch/x86/Kconfig
··· 109 109 select HAVE_RCU_USER_QS if X86_64 110 110 select HAVE_IRQ_TIME_ACCOUNTING 111 111 select GENERIC_KERNEL_THREAD 112 + select GENERIC_KERNEL_EXECVE 112 113 113 114 config INSTRUCTION_DECODER 114 115 def_bool y
-1
arch/x86/include/asm/unistd.h
··· 51 51 # define __ARCH_WANT_SYS_UTIME 52 52 # define __ARCH_WANT_SYS_WAITPID 53 53 # define __ARCH_WANT_SYS_EXECVE 54 - # define __ARCH_WANT_KERNEL_EXECVE 55 54 56 55 /* 57 56 * "Conditional" syscalls
+12 -19
arch/x86/kernel/entry_32.S
··· 299 299 CFI_ENDPROC 300 300 END(ret_from_fork) 301 301 302 - ENTRY(ret_from_kernel_execve) 303 - movl %eax, %esp 304 - movl $0,PT_EAX(%esp) 302 + ENTRY(ret_from_kernel_thread) 303 + CFI_STARTPROC 304 + pushl_cfi %eax 305 + call schedule_tail 305 306 GET_THREAD_INFO(%ebp) 307 + popl_cfi %eax 308 + pushl_cfi $0x0202 # Reset kernel eflags 309 + popfl_cfi 310 + movl PT_EBP(%esp),%eax 311 + call *PT_EBX(%esp) 312 + movl $0,PT_EAX(%esp) 306 313 jmp syscall_exit 307 - END(ret_from_kernel_execve) 314 + CFI_ENDPROC 315 + ENDPROC(ret_from_kernel_thread) 308 316 309 317 /* 310 318 * Interrupt exit functions should be protected against kprobes ··· 1022 1014 * End of kprobes section 1023 1015 */ 1024 1016 .popsection 1025 - 1026 - ENTRY(ret_from_kernel_thread) 1027 - CFI_STARTPROC 1028 - pushl_cfi %eax 1029 - call schedule_tail 1030 - GET_THREAD_INFO(%ebp) 1031 - popl_cfi %eax 1032 - pushl_cfi $0x0202 # Reset kernel eflags 1033 - popfl_cfi 1034 - movl PT_EBP(%esp),%eax 1035 - call *PT_EBX(%esp) 1036 - call do_exit 1037 - ud2 # padding for call trace 1038 - CFI_ENDPROC 1039 - ENDPROC(ret_from_kernel_thread) 1040 1017 1041 1018 #ifdef CONFIG_XEN 1042 1019 /* Xen doesn't set %esp to be precisely what the normal sysenter
+4 -20
arch/x86/kernel/entry_64.S
··· 563 563 jmp ret_from_sys_call # go to the SYSRET fastpath 564 564 565 565 1: 566 - subq $REST_SKIP, %rsp # move the stack pointer back 566 + subq $REST_SKIP, %rsp # leave space for volatiles 567 567 CFI_ADJUST_CFA_OFFSET REST_SKIP 568 568 movq %rbp, %rdi 569 569 call *%rbx 570 - # exit 571 - mov %eax, %edi 572 - call do_exit 573 - ud2 # padding for call trace 574 - 570 + movl $0, RAX(%rsp) 571 + RESTORE_REST 572 + jmp int_ret_from_sys_call 575 573 CFI_ENDPROC 576 574 END(ret_from_fork) 577 575 ··· 1323 1325 movl %eax,%gs 1324 1326 jmp 2b 1325 1327 .previous 1326 - 1327 - ENTRY(ret_from_kernel_execve) 1328 - movq %rdi, %rsp 1329 - movl $0, RAX(%rsp) 1330 - // RESTORE_REST 1331 - movq 0*8(%rsp), %r15 1332 - movq 1*8(%rsp), %r14 1333 - movq 2*8(%rsp), %r13 1334 - movq 3*8(%rsp), %r12 1335 - movq 4*8(%rsp), %rbp 1336 - movq 5*8(%rsp), %rbx 1337 - addq $(6*8), %rsp 1338 - jmp int_ret_from_sys_call 1339 - END(ret_from_kernel_execve) 1340 1328 1341 1329 /* Call softirq on interrupt stack. Interrupts are off. */ 1342 1330 ENTRY(call_softirq)
+1
arch/x86/um/Kconfig
··· 14 14 def_bool y 15 15 select GENERIC_FIND_FIRST_BIT 16 16 select GENERIC_KERNEL_THREAD 17 + select GENERIC_KERNEL_EXECVE 17 18 18 19 config 64BIT 19 20 bool "64-bit kernel" if SUBARCH = "x86"
+8
include/linux/syscalls.h
··· 827 827 const char __user *pathname); 828 828 asmlinkage long sys_syncfs(int fd); 829 829 830 + #ifndef CONFIG_GENERIC_KERNEL_EXECVE 830 831 int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); 832 + #else 833 + #define kernel_execve(filename, argv, envp) \ 834 + do_execve(filename, \ 835 + (const char __user *const __user *)argv, \ 836 + (const char __user *const __user *)envp, \ 837 + current_pt_regs()) 838 + #endif 831 839 832 840 833 841 asmlinkage long sys_perf_event_open(
+16 -25
init/do_mounts_initrd.c
··· 16 16 #include <linux/initrd.h> 17 17 #include <linux/sched.h> 18 18 #include <linux/freezer.h> 19 + #include <linux/kmod.h> 19 20 20 21 #include "do_mounts.h" 21 22 22 23 unsigned long initrd_start, initrd_end; 23 24 int initrd_below_start_ok; 24 25 unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ 25 - static int __initdata old_fd, root_fd; 26 26 static int __initdata mount_initrd = 1; 27 27 28 28 static int __init no_initrd(char *str) ··· 33 33 34 34 __setup("noinitrd", no_initrd); 35 35 36 - static int __init do_linuxrc(void *_shell) 36 + static int init_linuxrc(struct subprocess_info *info, struct cred *new) 37 37 { 38 - static const char *argv[] = { "linuxrc", NULL, }; 39 - extern const char *envp_init[]; 40 - const char *shell = _shell; 41 - 42 - sys_close(old_fd);sys_close(root_fd); 38 + sys_unshare(CLONE_FS | CLONE_FILES); 39 + /* move initrd over / and chdir/chroot in initrd root */ 40 + sys_chdir("/root"); 41 + sys_mount(".", "/", NULL, MS_MOVE, NULL); 42 + sys_chroot("."); 43 43 sys_setsid(); 44 - return kernel_execve(shell, argv, envp_init); 44 + return 0; 45 45 } 46 46 47 47 static void __init handle_initrd(void) 48 48 { 49 + static char *argv[] = { "linuxrc", NULL, }; 50 + extern char *envp_init[]; 49 51 int error; 50 - int pid; 51 52 52 53 real_root_dev = new_encode_dev(ROOT_DEV); 53 54 create_dev("/dev/root.old", Root_RAM0); 54 55 /* mount initrd on rootfs' /root */ 55 56 mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); 56 57 sys_mkdir("/old", 0700); 57 - root_fd = sys_open("/", 0, 0); 58 - old_fd = sys_open("/old", 0, 0); 59 - /* move initrd over / and chdir/chroot in initrd root */ 60 - sys_chdir("/root"); 61 - sys_mount(".", "/", NULL, MS_MOVE, NULL); 62 - sys_chroot("."); 58 + sys_chdir("/old"); 63 59 64 60 /* 65 61 * In case that a resume from disk is carried out by linuxrc or one of ··· 63 67 */ 64 68 current->flags |= PF_FREEZER_SKIP; 65 69 66 - pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); 67 - if (pid > 0) 68 - while (pid != sys_wait4(-1, NULL, 0, NULL)) 69 - yield(); 70 + call_usermodehelper_fns("/linuxrc", argv, envp_init, UMH_WAIT_PROC, 71 + init_linuxrc, NULL, NULL); 70 72 71 73 current->flags &= ~PF_FREEZER_SKIP; 72 74 73 75 /* move initrd to rootfs' /old */ 74 - sys_fchdir(old_fd); 75 - sys_mount("/", ".", NULL, MS_MOVE, NULL); 76 + sys_mount("..", ".", NULL, MS_MOVE, NULL); 76 77 /* switch root and cwd back to / of rootfs */ 77 - sys_fchdir(root_fd); 78 - sys_chroot("."); 79 - sys_close(old_fd); 80 - sys_close(root_fd); 78 + sys_chroot(".."); 81 79 82 80 if (new_decode_dev(real_root_dev) == Root_RAM0) { 83 81 sys_chdir("/old"); 84 82 return; 85 83 } 86 84 85 + sys_chdir("/"); 87 86 ROOT_DEV = new_decode_dev(real_root_dev); 88 87 mount_root(); 89 88
+17 -16
init/main.c
··· 69 69 #include <linux/slab.h> 70 70 #include <linux/perf_event.h> 71 71 #include <linux/file.h> 72 + #include <linux/ptrace.h> 72 73 73 74 #include <asm/io.h> 74 75 #include <asm/bugs.h> ··· 792 791 do_one_initcall(*fn); 793 792 } 794 793 795 - static void run_init_process(const char *init_filename) 794 + static int run_init_process(const char *init_filename) 796 795 { 797 796 argv_init[0] = init_filename; 798 - kernel_execve(init_filename, argv_init, envp_init); 797 + return kernel_execve(init_filename, argv_init, envp_init); 799 798 } 800 799 801 - /* This is a non __init function. Force it to be noinline otherwise gcc 802 - * makes it inline to init() and it becomes part of init.text section 803 - */ 804 - static noinline int init_post(void) 800 + static void __init kernel_init_freeable(void); 801 + 802 + static int __ref kernel_init(void *unused) 805 803 { 804 + kernel_init_freeable(); 806 805 /* need to finish all async __init code before freeing the memory */ 807 806 async_synchronize_full(); 808 807 free_initmem(); ··· 814 813 flush_delayed_fput(); 815 814 816 815 if (ramdisk_execute_command) { 817 - run_init_process(ramdisk_execute_command); 816 + if (!run_init_process(ramdisk_execute_command)) 817 + return 0; 818 818 printk(KERN_WARNING "Failed to execute %s\n", 819 819 ramdisk_execute_command); 820 820 } ··· 827 825 * trying to recover a really broken machine. 828 826 */ 829 827 if (execute_command) { 830 - run_init_process(execute_command); 828 + if (!run_init_process(execute_command)) 829 + return 0; 831 830 printk(KERN_WARNING "Failed to execute %s. Attempting " 832 831 "defaults...\n", execute_command); 833 832 } 834 - run_init_process("/sbin/init"); 835 - run_init_process("/etc/init"); 836 - run_init_process("/bin/init"); 837 - run_init_process("/bin/sh"); 833 + if (!run_init_process("/sbin/init") || 834 + !run_init_process("/etc/init") || 835 + !run_init_process("/bin/init") || 836 + !run_init_process("/bin/sh")) 837 + return 0; 838 838 839 839 panic("No init found. Try passing init= option to kernel. " 840 840 "See Linux Documentation/init.txt for guidance."); 841 841 } 842 842 843 - static int __init kernel_init(void * unused) 843 + static void __init kernel_init_freeable(void) 844 844 { 845 845 /* 846 846 * Wait until kthreadd is all set-up. ··· 897 893 * we're essentially up and running. Get rid of the 898 894 * initmem segments and start the user-mode stuff.. 899 895 */ 900 - 901 - init_post(); 902 - return 0; 903 896 }
+5 -2
kernel/kmod.c
··· 37 37 #include <linux/notifier.h> 38 38 #include <linux/suspend.h> 39 39 #include <linux/rwsem.h> 40 + #include <linux/ptrace.h> 40 41 #include <asm/uaccess.h> 41 42 42 43 #include <trace/events/module.h> ··· 222 221 retval = kernel_execve(sub_info->path, 223 222 (const char *const *)sub_info->argv, 224 223 (const char *const *)sub_info->envp); 224 + if (!retval) 225 + return 0; 225 226 226 227 /* Exec failed? */ 227 228 fail: 228 229 sub_info->retval = retval; 229 - return 0; 230 + do_exit(0); 230 231 } 231 232 232 233 static int call_helper(void *data) ··· 295 292 } 296 293 297 294 umh_complete(sub_info); 298 - return 0; 295 + do_exit(0); 299 296 } 300 297 301 298 /* This is run by khelper thread */
+1
kernel/kthread.c
··· 16 16 #include <linux/mutex.h> 17 17 #include <linux/slab.h> 18 18 #include <linux/freezer.h> 19 + #include <linux/ptrace.h> 19 20 #include <trace/events/sched.h> 20 21 21 22 static DEFINE_SPINLOCK(kthread_create_lock);