1/* 2 * linux/fs/exec.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7/* 8 * #!-checking implemented by tytso. 9 */ 10/* 11 * Demand-loading implemented 01.12.91 - no need to read anything but 12 * the header into memory. The inode of the executable is put into 13 * "current->executable", and page faults do the actual loading. Clean. 14 * 15 * Once more I can proudly say that linux stood up to being changed: it 16 * was less than 2 hours work to get demand-loading completely implemented. 17 * 18 * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead, 19 * current->executable is only used by the procfs. This allows a dispatch 20 * table to check for several different types of binary formats. We keep 21 * trying until we recognize the file or we run out of supported binary 22 * formats. 23 */ 24 25#include <linux/config.h> 26#include <linux/slab.h> 27#include <linux/file.h> 28#include <linux/mman.h> 29#include <linux/a.out.h> 30#include <linux/stat.h> 31#include <linux/fcntl.h> 32#include <linux/smp_lock.h> 33#include <linux/init.h> 34#include <linux/pagemap.h> 35#include <linux/highmem.h> 36#include <linux/spinlock.h> 37#include <linux/key.h> 38#include <linux/personality.h> 39#include <linux/binfmts.h> 40#include <linux/swap.h> 41#include <linux/utsname.h> 42#include <linux/module.h> 43#include <linux/namei.h> 44#include <linux/proc_fs.h> 45#include <linux/ptrace.h> 46#include <linux/mount.h> 47#include <linux/security.h> 48#include <linux/syscalls.h> 49#include <linux/rmap.h> 50#include <linux/acct.h> 51#include <linux/cn_proc.h> 52 53#include <asm/uaccess.h> 54#include <asm/mmu_context.h> 55 56#ifdef CONFIG_KMOD 57#include <linux/kmod.h> 58#endif 59 60int core_uses_pid; 61char core_pattern[65] = "core"; 62int suid_dumpable = 0; 63 64EXPORT_SYMBOL(suid_dumpable); 65/* The maximal length of core_pattern is also specified in sysctl.c */ 66 67static struct linux_binfmt *formats; 68static DEFINE_RWLOCK(binfmt_lock); 69 70int register_binfmt(struct linux_binfmt * fmt) 71{ 72 struct linux_binfmt ** tmp = &formats; 73 74 if (!fmt) 75 return -EINVAL; 76 if (fmt->next) 77 return -EBUSY; 78 write_lock(&binfmt_lock); 79 while (*tmp) { 80 if (fmt == *tmp) { 81 write_unlock(&binfmt_lock); 82 return -EBUSY; 83 } 84 tmp = &(*tmp)->next; 85 } 86 fmt->next = formats; 87 formats = fmt; 88 write_unlock(&binfmt_lock); 89 return 0; 90} 91 92EXPORT_SYMBOL(register_binfmt); 93 94int unregister_binfmt(struct linux_binfmt * fmt) 95{ 96 struct linux_binfmt ** tmp = &formats; 97 98 write_lock(&binfmt_lock); 99 while (*tmp) { 100 if (fmt == *tmp) { 101 *tmp = fmt->next; 102 write_unlock(&binfmt_lock); 103 return 0; 104 } 105 tmp = &(*tmp)->next; 106 } 107 write_unlock(&binfmt_lock); 108 return -EINVAL; 109} 110 111EXPORT_SYMBOL(unregister_binfmt); 112 113static inline void put_binfmt(struct linux_binfmt * fmt) 114{ 115 module_put(fmt->module); 116} 117 118/* 119 * Note that a shared library must be both readable and executable due to 120 * security reasons. 121 * 122 * Also note that we take the address to load from from the file itself. 123 */ 124asmlinkage long sys_uselib(const char __user * library) 125{ 126 struct file * file; 127 struct nameidata nd; 128 int error; 129 130 error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ); 131 if (error) 132 goto out; 133 134 error = -EINVAL; 135 if (!S_ISREG(nd.dentry->d_inode->i_mode)) 136 goto exit; 137 138 error = vfs_permission(&nd, MAY_READ | MAY_EXEC); 139 if (error) 140 goto exit; 141 142 file = nameidata_to_filp(&nd, O_RDONLY); 143 error = PTR_ERR(file); 144 if (IS_ERR(file)) 145 goto out; 146 147 error = -ENOEXEC; 148 if(file->f_op) { 149 struct linux_binfmt * fmt; 150 151 read_lock(&binfmt_lock); 152 for (fmt = formats ; fmt ; fmt = fmt->next) { 153 if (!fmt->load_shlib) 154 continue; 155 if (!try_module_get(fmt->module)) 156 continue; 157 read_unlock(&binfmt_lock); 158 error = fmt->load_shlib(file); 159 read_lock(&binfmt_lock); 160 put_binfmt(fmt); 161 if (error != -ENOEXEC) 162 break; 163 } 164 read_unlock(&binfmt_lock); 165 } 166 fput(file); 167out: 168 return error; 169exit: 170 release_open_intent(&nd); 171 path_release(&nd); 172 goto out; 173} 174 175/* 176 * count() counts the number of strings in array ARGV. 177 */ 178static int count(char __user * __user * argv, int max) 179{ 180 int i = 0; 181 182 if (argv != NULL) { 183 for (;;) { 184 char __user * p; 185 186 if (get_user(p, argv)) 187 return -EFAULT; 188 if (!p) 189 break; 190 argv++; 191 if(++i > max) 192 return -E2BIG; 193 cond_resched(); 194 } 195 } 196 return i; 197} 198 199/* 200 * 'copy_strings()' copies argument/environment strings from user 201 * memory to free pages in kernel mem. These are in a format ready 202 * to be put directly into the top of new user memory. 203 */ 204static int copy_strings(int argc, char __user * __user * argv, 205 struct linux_binprm *bprm) 206{ 207 struct page *kmapped_page = NULL; 208 char *kaddr = NULL; 209 int ret; 210 211 while (argc-- > 0) { 212 char __user *str; 213 int len; 214 unsigned long pos; 215 216 if (get_user(str, argv+argc) || 217 !(len = strnlen_user(str, bprm->p))) { 218 ret = -EFAULT; 219 goto out; 220 } 221 222 if (bprm->p < len) { 223 ret = -E2BIG; 224 goto out; 225 } 226 227 bprm->p -= len; 228 /* XXX: add architecture specific overflow check here. */ 229 pos = bprm->p; 230 231 while (len > 0) { 232 int i, new, err; 233 int offset, bytes_to_copy; 234 struct page *page; 235 236 offset = pos % PAGE_SIZE; 237 i = pos/PAGE_SIZE; 238 page = bprm->page[i]; 239 new = 0; 240 if (!page) { 241 page = alloc_page(GFP_HIGHUSER); 242 bprm->page[i] = page; 243 if (!page) { 244 ret = -ENOMEM; 245 goto out; 246 } 247 new = 1; 248 } 249 250 if (page != kmapped_page) { 251 if (kmapped_page) 252 kunmap(kmapped_page); 253 kmapped_page = page; 254 kaddr = kmap(kmapped_page); 255 } 256 if (new && offset) 257 memset(kaddr, 0, offset); 258 bytes_to_copy = PAGE_SIZE - offset; 259 if (bytes_to_copy > len) { 260 bytes_to_copy = len; 261 if (new) 262 memset(kaddr+offset+len, 0, 263 PAGE_SIZE-offset-len); 264 } 265 err = copy_from_user(kaddr+offset, str, bytes_to_copy); 266 if (err) { 267 ret = -EFAULT; 268 goto out; 269 } 270 271 pos += bytes_to_copy; 272 str += bytes_to_copy; 273 len -= bytes_to_copy; 274 } 275 } 276 ret = 0; 277out: 278 if (kmapped_page) 279 kunmap(kmapped_page); 280 return ret; 281} 282 283/* 284 * Like copy_strings, but get argv and its values from kernel memory. 285 */ 286int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) 287{ 288 int r; 289 mm_segment_t oldfs = get_fs(); 290 set_fs(KERNEL_DS); 291 r = copy_strings(argc, (char __user * __user *)argv, bprm); 292 set_fs(oldfs); 293 return r; 294} 295 296EXPORT_SYMBOL(copy_strings_kernel); 297 298#ifdef CONFIG_MMU 299/* 300 * This routine is used to map in a page into an address space: needed by 301 * execve() for the initial stack and environment pages. 302 * 303 * vma->vm_mm->mmap_sem is held for writing. 304 */ 305void install_arg_page(struct vm_area_struct *vma, 306 struct page *page, unsigned long address) 307{ 308 struct mm_struct *mm = vma->vm_mm; 309 pgd_t * pgd; 310 pud_t * pud; 311 pmd_t * pmd; 312 pte_t * pte; 313 spinlock_t *ptl; 314 315 if (unlikely(anon_vma_prepare(vma))) 316 goto out; 317 318 flush_dcache_page(page); 319 pgd = pgd_offset(mm, address); 320 pud = pud_alloc(mm, pgd, address); 321 if (!pud) 322 goto out; 323 pmd = pmd_alloc(mm, pud, address); 324 if (!pmd) 325 goto out; 326 pte = pte_alloc_map_lock(mm, pmd, address, &ptl); 327 if (!pte) 328 goto out; 329 if (!pte_none(*pte)) { 330 pte_unmap_unlock(pte, ptl); 331 goto out; 332 } 333 inc_mm_counter(mm, anon_rss); 334 lru_cache_add_active(page); 335 set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( 336 page, vma->vm_page_prot)))); 337 page_add_anon_rmap(page, vma, address); 338 pte_unmap_unlock(pte, ptl); 339 340 /* no need for flush_tlb */ 341 return; 342out: 343 __free_page(page); 344 force_sig(SIGKILL, current); 345} 346 347#define EXTRA_STACK_VM_PAGES 20 /* random */ 348 349int setup_arg_pages(struct linux_binprm *bprm, 350 unsigned long stack_top, 351 int executable_stack) 352{ 353 unsigned long stack_base; 354 struct vm_area_struct *mpnt; 355 struct mm_struct *mm = current->mm; 356 int i, ret; 357 long arg_size; 358 359#ifdef CONFIG_STACK_GROWSUP 360 /* Move the argument and environment strings to the bottom of the 361 * stack space. 362 */ 363 int offset, j; 364 char *to, *from; 365 366 /* Start by shifting all the pages down */ 367 i = 0; 368 for (j = 0; j < MAX_ARG_PAGES; j++) { 369 struct page *page = bprm->page[j]; 370 if (!page) 371 continue; 372 bprm->page[i++] = page; 373 } 374 375 /* Now move them within their pages */ 376 offset = bprm->p % PAGE_SIZE; 377 to = kmap(bprm->page[0]); 378 for (j = 1; j < i; j++) { 379 memmove(to, to + offset, PAGE_SIZE - offset); 380 from = kmap(bprm->page[j]); 381 memcpy(to + PAGE_SIZE - offset, from, offset); 382 kunmap(bprm->page[j - 1]); 383 to = from; 384 } 385 memmove(to, to + offset, PAGE_SIZE - offset); 386 kunmap(bprm->page[j - 1]); 387 388 /* Limit stack size to 1GB */ 389 stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max; 390 if (stack_base > (1 << 30)) 391 stack_base = 1 << 30; 392 stack_base = PAGE_ALIGN(stack_top - stack_base); 393 394 /* Adjust bprm->p to point to the end of the strings. */ 395 bprm->p = stack_base + PAGE_SIZE * i - offset; 396 397 mm->arg_start = stack_base; 398 arg_size = i << PAGE_SHIFT; 399 400 /* zero pages that were copied above */ 401 while (i < MAX_ARG_PAGES) 402 bprm->page[i++] = NULL; 403#else 404 stack_base = arch_align_stack(stack_top - MAX_ARG_PAGES*PAGE_SIZE); 405 stack_base = PAGE_ALIGN(stack_base); 406 bprm->p += stack_base; 407 mm->arg_start = bprm->p; 408 arg_size = stack_top - (PAGE_MASK & (unsigned long) mm->arg_start); 409#endif 410 411 arg_size += EXTRA_STACK_VM_PAGES * PAGE_SIZE; 412 413 if (bprm->loader) 414 bprm->loader += stack_base; 415 bprm->exec += stack_base; 416 417 mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 418 if (!mpnt) 419 return -ENOMEM; 420 421 memset(mpnt, 0, sizeof(*mpnt)); 422 423 down_write(&mm->mmap_sem); 424 { 425 mpnt->vm_mm = mm; 426#ifdef CONFIG_STACK_GROWSUP 427 mpnt->vm_start = stack_base; 428 mpnt->vm_end = stack_base + arg_size; 429#else 430 mpnt->vm_end = stack_top; 431 mpnt->vm_start = mpnt->vm_end - arg_size; 432#endif 433 /* Adjust stack execute permissions; explicitly enable 434 * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X 435 * and leave alone (arch default) otherwise. */ 436 if (unlikely(executable_stack == EXSTACK_ENABLE_X)) 437 mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; 438 else if (executable_stack == EXSTACK_DISABLE_X) 439 mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC; 440 else 441 mpnt->vm_flags = VM_STACK_FLAGS; 442 mpnt->vm_flags |= mm->def_flags; 443 mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7]; 444 if ((ret = insert_vm_struct(mm, mpnt))) { 445 up_write(&mm->mmap_sem); 446 kmem_cache_free(vm_area_cachep, mpnt); 447 return ret; 448 } 449 mm->stack_vm = mm->total_vm = vma_pages(mpnt); 450 } 451 452 for (i = 0 ; i < MAX_ARG_PAGES ; i++) { 453 struct page *page = bprm->page[i]; 454 if (page) { 455 bprm->page[i] = NULL; 456 install_arg_page(mpnt, page, stack_base); 457 } 458 stack_base += PAGE_SIZE; 459 } 460 up_write(&mm->mmap_sem); 461 462 return 0; 463} 464 465EXPORT_SYMBOL(setup_arg_pages); 466 467#define free_arg_pages(bprm) do { } while (0) 468 469#else 470 471static inline void free_arg_pages(struct linux_binprm *bprm) 472{ 473 int i; 474 475 for (i = 0; i < MAX_ARG_PAGES; i++) { 476 if (bprm->page[i]) 477 __free_page(bprm->page[i]); 478 bprm->page[i] = NULL; 479 } 480} 481 482#endif /* CONFIG_MMU */ 483 484struct file *open_exec(const char *name) 485{ 486 struct nameidata nd; 487 int err; 488 struct file *file; 489 490 err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ); 491 file = ERR_PTR(err); 492 493 if (!err) { 494 struct inode *inode = nd.dentry->d_inode; 495 file = ERR_PTR(-EACCES); 496 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && 497 S_ISREG(inode->i_mode)) { 498 int err = vfs_permission(&nd, MAY_EXEC); 499 if (!err && !(inode->i_mode & 0111)) 500 err = -EACCES; 501 file = ERR_PTR(err); 502 if (!err) { 503 file = nameidata_to_filp(&nd, O_RDONLY); 504 if (!IS_ERR(file)) { 505 err = deny_write_access(file); 506 if (err) { 507 fput(file); 508 file = ERR_PTR(err); 509 } 510 } 511out: 512 return file; 513 } 514 } 515 release_open_intent(&nd); 516 path_release(&nd); 517 } 518 goto out; 519} 520 521EXPORT_SYMBOL(open_exec); 522 523int kernel_read(struct file *file, unsigned long offset, 524 char *addr, unsigned long count) 525{ 526 mm_segment_t old_fs; 527 loff_t pos = offset; 528 int result; 529 530 old_fs = get_fs(); 531 set_fs(get_ds()); 532 /* The cast to a user pointer is valid due to the set_fs() */ 533 result = vfs_read(file, (void __user *)addr, count, &pos); 534 set_fs(old_fs); 535 return result; 536} 537 538EXPORT_SYMBOL(kernel_read); 539 540static int exec_mmap(struct mm_struct *mm) 541{ 542 struct task_struct *tsk; 543 struct mm_struct * old_mm, *active_mm; 544 545 /* Notify parent that we're no longer interested in the old VM */ 546 tsk = current; 547 old_mm = current->mm; 548 mm_release(tsk, old_mm); 549 550 if (old_mm) { 551 /* 552 * Make sure that if there is a core dump in progress 553 * for the old mm, we get out and die instead of going 554 * through with the exec. We must hold mmap_sem around 555 * checking core_waiters and changing tsk->mm. The 556 * core-inducing thread will increment core_waiters for 557 * each thread whose ->mm == old_mm. 558 */ 559 down_read(&old_mm->mmap_sem); 560 if (unlikely(old_mm->core_waiters)) { 561 up_read(&old_mm->mmap_sem); 562 return -EINTR; 563 } 564 } 565 task_lock(tsk); 566 active_mm = tsk->active_mm; 567 tsk->mm = mm; 568 tsk->active_mm = mm; 569 activate_mm(active_mm, mm); 570 task_unlock(tsk); 571 arch_pick_mmap_layout(mm); 572 if (old_mm) { 573 up_read(&old_mm->mmap_sem); 574 if (active_mm != old_mm) BUG(); 575 mmput(old_mm); 576 return 0; 577 } 578 mmdrop(active_mm); 579 return 0; 580} 581 582/* 583 * This function makes sure the current process has its own signal table, 584 * so that flush_signal_handlers can later reset the handlers without 585 * disturbing other processes. (Other processes might share the signal 586 * table via the CLONE_SIGHAND option to clone().) 587 */ 588static inline int de_thread(struct task_struct *tsk) 589{ 590 struct signal_struct *sig = tsk->signal; 591 struct sighand_struct *newsighand, *oldsighand = tsk->sighand; 592 spinlock_t *lock = &oldsighand->siglock; 593 struct task_struct *leader = NULL; 594 int count; 595 596 /* 597 * If we don't share sighandlers, then we aren't sharing anything 598 * and we can just re-use it all. 599 */ 600 if (atomic_read(&oldsighand->count) <= 1) { 601 BUG_ON(atomic_read(&sig->count) != 1); 602 exit_itimers(sig); 603 return 0; 604 } 605 606 newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); 607 if (!newsighand) 608 return -ENOMEM; 609 610 if (thread_group_empty(current)) 611 goto no_thread_group; 612 613 /* 614 * Kill all other threads in the thread group. 615 * We must hold tasklist_lock to call zap_other_threads. 616 */ 617 read_lock(&tasklist_lock); 618 spin_lock_irq(lock); 619 if (sig->flags & SIGNAL_GROUP_EXIT) { 620 /* 621 * Another group action in progress, just 622 * return so that the signal is processed. 623 */ 624 spin_unlock_irq(lock); 625 read_unlock(&tasklist_lock); 626 kmem_cache_free(sighand_cachep, newsighand); 627 return -EAGAIN; 628 } 629 zap_other_threads(current); 630 read_unlock(&tasklist_lock); 631 632 /* 633 * Account for the thread group leader hanging around: 634 */ 635 count = 1; 636 if (!thread_group_leader(current)) { 637 count = 2; 638 /* 639 * The SIGALRM timer survives the exec, but needs to point 640 * at us as the new group leader now. We have a race with 641 * a timer firing now getting the old leader, so we need to 642 * synchronize with any firing (by calling del_timer_sync) 643 * before we can safely let the old group leader die. 644 */ 645 sig->real_timer.data = (unsigned long)current; 646 spin_unlock_irq(lock); 647 if (del_timer_sync(&sig->real_timer)) 648 add_timer(&sig->real_timer); 649 spin_lock_irq(lock); 650 } 651 while (atomic_read(&sig->count) > count) { 652 sig->group_exit_task = current; 653 sig->notify_count = count; 654 __set_current_state(TASK_UNINTERRUPTIBLE); 655 spin_unlock_irq(lock); 656 schedule(); 657 spin_lock_irq(lock); 658 } 659 sig->group_exit_task = NULL; 660 sig->notify_count = 0; 661 spin_unlock_irq(lock); 662 663 /* 664 * At this point all other threads have exited, all we have to 665 * do is to wait for the thread group leader to become inactive, 666 * and to assume its PID: 667 */ 668 if (!thread_group_leader(current)) { 669 struct task_struct *parent; 670 struct dentry *proc_dentry1, *proc_dentry2; 671 unsigned long ptrace; 672 673 /* 674 * Wait for the thread group leader to be a zombie. 675 * It should already be zombie at this point, most 676 * of the time. 677 */ 678 leader = current->group_leader; 679 while (leader->exit_state != EXIT_ZOMBIE) 680 yield(); 681 682 spin_lock(&leader->proc_lock); 683 spin_lock(&current->proc_lock); 684 proc_dentry1 = proc_pid_unhash(current); 685 proc_dentry2 = proc_pid_unhash(leader); 686 write_lock_irq(&tasklist_lock); 687 688 BUG_ON(leader->tgid != current->tgid); 689 BUG_ON(current->pid == current->tgid); 690 /* 691 * An exec() starts a new thread group with the 692 * TGID of the previous thread group. Rehash the 693 * two threads with a switched PID, and release 694 * the former thread group leader: 695 */ 696 ptrace = leader->ptrace; 697 parent = leader->parent; 698 if (unlikely(ptrace) && unlikely(parent == current)) { 699 /* 700 * Joker was ptracing his own group leader, 701 * and now he wants to be his own parent! 702 * We can't have that. 703 */ 704 ptrace = 0; 705 } 706 707 ptrace_unlink(current); 708 ptrace_unlink(leader); 709 remove_parent(current); 710 remove_parent(leader); 711 712 switch_exec_pids(leader, current); 713 714 current->parent = current->real_parent = leader->real_parent; 715 leader->parent = leader->real_parent = child_reaper; 716 current->group_leader = current; 717 leader->group_leader = leader; 718 719 add_parent(current, current->parent); 720 add_parent(leader, leader->parent); 721 if (ptrace) { 722 current->ptrace = ptrace; 723 __ptrace_link(current, parent); 724 } 725 726 list_del(&current->tasks); 727 list_add_tail(&current->tasks, &init_task.tasks); 728 current->exit_signal = SIGCHLD; 729 730 BUG_ON(leader->exit_state != EXIT_ZOMBIE); 731 leader->exit_state = EXIT_DEAD; 732 733 write_unlock_irq(&tasklist_lock); 734 spin_unlock(&leader->proc_lock); 735 spin_unlock(&current->proc_lock); 736 proc_pid_flush(proc_dentry1); 737 proc_pid_flush(proc_dentry2); 738 } 739 740 /* 741 * There may be one thread left which is just exiting, 742 * but it's safe to stop telling the group to kill themselves. 743 */ 744 sig->flags = 0; 745 746no_thread_group: 747 exit_itimers(sig); 748 if (leader) 749 release_task(leader); 750 751 BUG_ON(atomic_read(&sig->count) != 1); 752 753 if (atomic_read(&oldsighand->count) == 1) { 754 /* 755 * Now that we nuked the rest of the thread group, 756 * it turns out we are not sharing sighand any more either. 757 * So we can just keep it. 758 */ 759 kmem_cache_free(sighand_cachep, newsighand); 760 } else { 761 /* 762 * Move our state over to newsighand and switch it in. 763 */ 764 spin_lock_init(&newsighand->siglock); 765 atomic_set(&newsighand->count, 1); 766 memcpy(newsighand->action, oldsighand->action, 767 sizeof(newsighand->action)); 768 769 write_lock_irq(&tasklist_lock); 770 spin_lock(&oldsighand->siglock); 771 spin_lock(&newsighand->siglock); 772 773 current->sighand = newsighand; 774 recalc_sigpending(); 775 776 spin_unlock(&newsighand->siglock); 777 spin_unlock(&oldsighand->siglock); 778 write_unlock_irq(&tasklist_lock); 779 780 if (atomic_dec_and_test(&oldsighand->count)) 781 kmem_cache_free(sighand_cachep, oldsighand); 782 } 783 784 BUG_ON(!thread_group_leader(current)); 785 return 0; 786} 787 788/* 789 * These functions flushes out all traces of the currently running executable 790 * so that a new one can be started 791 */ 792 793static inline void flush_old_files(struct files_struct * files) 794{ 795 long j = -1; 796 struct fdtable *fdt; 797 798 spin_lock(&files->file_lock); 799 for (;;) { 800 unsigned long set, i; 801 802 j++; 803 i = j * __NFDBITS; 804 fdt = files_fdtable(files); 805 if (i >= fdt->max_fds || i >= fdt->max_fdset) 806 break; 807 set = fdt->close_on_exec->fds_bits[j]; 808 if (!set) 809 continue; 810 fdt->close_on_exec->fds_bits[j] = 0; 811 spin_unlock(&files->file_lock); 812 for ( ; set ; i++,set >>= 1) { 813 if (set & 1) { 814 sys_close(i); 815 } 816 } 817 spin_lock(&files->file_lock); 818 819 } 820 spin_unlock(&files->file_lock); 821} 822 823void get_task_comm(char *buf, struct task_struct *tsk) 824{ 825 /* buf must be at least sizeof(tsk->comm) in size */ 826 task_lock(tsk); 827 strncpy(buf, tsk->comm, sizeof(tsk->comm)); 828 task_unlock(tsk); 829} 830 831void set_task_comm(struct task_struct *tsk, char *buf) 832{ 833 task_lock(tsk); 834 strlcpy(tsk->comm, buf, sizeof(tsk->comm)); 835 task_unlock(tsk); 836} 837 838int flush_old_exec(struct linux_binprm * bprm) 839{ 840 char * name; 841 int i, ch, retval; 842 struct files_struct *files; 843 char tcomm[sizeof(current->comm)]; 844 845 /* 846 * Make sure we have a private signal table and that 847 * we are unassociated from the previous thread group. 848 */ 849 retval = de_thread(current); 850 if (retval) 851 goto out; 852 853 /* 854 * Make sure we have private file handles. Ask the 855 * fork helper to do the work for us and the exit 856 * helper to do the cleanup of the old one. 857 */ 858 files = current->files; /* refcounted so safe to hold */ 859 retval = unshare_files(); 860 if (retval) 861 goto out; 862 /* 863 * Release all of the old mmap stuff 864 */ 865 retval = exec_mmap(bprm->mm); 866 if (retval) 867 goto mmap_failed; 868 869 bprm->mm = NULL; /* We're using it now */ 870 871 /* This is the point of no return */ 872 steal_locks(files); 873 put_files_struct(files); 874 875 current->sas_ss_sp = current->sas_ss_size = 0; 876 877 if (current->euid == current->uid && current->egid == current->gid) 878 current->mm->dumpable = 1; 879 else 880 current->mm->dumpable = suid_dumpable; 881 882 name = bprm->filename; 883 884 /* Copies the binary name from after last slash */ 885 for (i=0; (ch = *(name++)) != '\0';) { 886 if (ch == '/') 887 i = 0; /* overwrite what we wrote */ 888 else 889 if (i < (sizeof(tcomm) - 1)) 890 tcomm[i++] = ch; 891 } 892 tcomm[i] = '\0'; 893 set_task_comm(current, tcomm); 894 895 current->flags &= ~PF_RANDOMIZE; 896 flush_thread(); 897 898 if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 899 file_permission(bprm->file, MAY_READ) || 900 (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { 901 suid_keys(current); 902 current->mm->dumpable = suid_dumpable; 903 } 904 905 /* An exec changes our domain. We are no longer part of the thread 906 group */ 907 908 current->self_exec_id++; 909 910 flush_signal_handlers(current, 0); 911 flush_old_files(current->files); 912 913 return 0; 914 915mmap_failed: 916 put_files_struct(current->files); 917 current->files = files; 918out: 919 return retval; 920} 921 922EXPORT_SYMBOL(flush_old_exec); 923 924/* 925 * Fill the binprm structure from the inode. 926 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes 927 */ 928int prepare_binprm(struct linux_binprm *bprm) 929{ 930 int mode; 931 struct inode * inode = bprm->file->f_dentry->d_inode; 932 int retval; 933 934 mode = inode->i_mode; 935 /* 936 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE, 937 * generic_permission lets a non-executable through 938 */ 939 if (!(mode & 0111)) /* with at least _one_ execute bit set */ 940 return -EACCES; 941 if (bprm->file->f_op == NULL) 942 return -EACCES; 943 944 bprm->e_uid = current->euid; 945 bprm->e_gid = current->egid; 946 947 if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) { 948 /* Set-uid? */ 949 if (mode & S_ISUID) { 950 current->personality &= ~PER_CLEAR_ON_SETID; 951 bprm->e_uid = inode->i_uid; 952 } 953 954 /* Set-gid? */ 955 /* 956 * If setgid is set but no group execute bit then this 957 * is a candidate for mandatory locking, not a setgid 958 * executable. 959 */ 960 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 961 current->personality &= ~PER_CLEAR_ON_SETID; 962 bprm->e_gid = inode->i_gid; 963 } 964 } 965 966 /* fill in binprm security blob */ 967 retval = security_bprm_set(bprm); 968 if (retval) 969 return retval; 970 971 memset(bprm->buf,0,BINPRM_BUF_SIZE); 972 return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE); 973} 974 975EXPORT_SYMBOL(prepare_binprm); 976 977static inline int unsafe_exec(struct task_struct *p) 978{ 979 int unsafe = 0; 980 if (p->ptrace & PT_PTRACED) { 981 if (p->ptrace & PT_PTRACE_CAP) 982 unsafe |= LSM_UNSAFE_PTRACE_CAP; 983 else 984 unsafe |= LSM_UNSAFE_PTRACE; 985 } 986 if (atomic_read(&p->fs->count) > 1 || 987 atomic_read(&p->files->count) > 1 || 988 atomic_read(&p->sighand->count) > 1) 989 unsafe |= LSM_UNSAFE_SHARE; 990 991 return unsafe; 992} 993 994void compute_creds(struct linux_binprm *bprm) 995{ 996 int unsafe; 997 998 if (bprm->e_uid != current->uid) 999 suid_keys(current); 1000 exec_keys(current); 1001 1002 task_lock(current); 1003 unsafe = unsafe_exec(current); 1004 security_bprm_apply_creds(bprm, unsafe); 1005 task_unlock(current); 1006 security_bprm_post_apply_creds(bprm); 1007} 1008 1009EXPORT_SYMBOL(compute_creds); 1010 1011void remove_arg_zero(struct linux_binprm *bprm) 1012{ 1013 if (bprm->argc) { 1014 unsigned long offset; 1015 char * kaddr; 1016 struct page *page; 1017 1018 offset = bprm->p % PAGE_SIZE; 1019 goto inside; 1020 1021 while (bprm->p++, *(kaddr+offset++)) { 1022 if (offset != PAGE_SIZE) 1023 continue; 1024 offset = 0; 1025 kunmap_atomic(kaddr, KM_USER0); 1026inside: 1027 page = bprm->page[bprm->p/PAGE_SIZE]; 1028 kaddr = kmap_atomic(page, KM_USER0); 1029 } 1030 kunmap_atomic(kaddr, KM_USER0); 1031 bprm->argc--; 1032 } 1033} 1034 1035EXPORT_SYMBOL(remove_arg_zero); 1036 1037/* 1038 * cycle the list of binary formats handler, until one recognizes the image 1039 */ 1040int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) 1041{ 1042 int try,retval; 1043 struct linux_binfmt *fmt; 1044#ifdef __alpha__ 1045 /* handle /sbin/loader.. */ 1046 { 1047 struct exec * eh = (struct exec *) bprm->buf; 1048 1049 if (!bprm->loader && eh->fh.f_magic == 0x183 && 1050 (eh->fh.f_flags & 0x3000) == 0x3000) 1051 { 1052 struct file * file; 1053 unsigned long loader; 1054 1055 allow_write_access(bprm->file); 1056 fput(bprm->file); 1057 bprm->file = NULL; 1058 1059 loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); 1060 1061 file = open_exec("/sbin/loader"); 1062 retval = PTR_ERR(file); 1063 if (IS_ERR(file)) 1064 return retval; 1065 1066 /* Remember if the application is TASO. */ 1067 bprm->sh_bang = eh->ah.entry < 0x100000000UL; 1068 1069 bprm->file = file; 1070 bprm->loader = loader; 1071 retval = prepare_binprm(bprm); 1072 if (retval<0) 1073 return retval; 1074 /* should call search_binary_handler recursively here, 1075 but it does not matter */ 1076 } 1077 } 1078#endif 1079 retval = security_bprm_check(bprm); 1080 if (retval) 1081 return retval; 1082 1083 /* kernel module loader fixup */ 1084 /* so we don't try to load run modprobe in kernel space. */ 1085 set_fs(USER_DS); 1086 retval = -ENOENT; 1087 for (try=0; try<2; try++) { 1088 read_lock(&binfmt_lock); 1089 for (fmt = formats ; fmt ; fmt = fmt->next) { 1090 int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary; 1091 if (!fn) 1092 continue; 1093 if (!try_module_get(fmt->module)) 1094 continue; 1095 read_unlock(&binfmt_lock); 1096 retval = fn(bprm, regs); 1097 if (retval >= 0) { 1098 put_binfmt(fmt); 1099 allow_write_access(bprm->file); 1100 if (bprm->file) 1101 fput(bprm->file); 1102 bprm->file = NULL; 1103 current->did_exec = 1; 1104 proc_exec_connector(current); 1105 return retval; 1106 } 1107 read_lock(&binfmt_lock); 1108 put_binfmt(fmt); 1109 if (retval != -ENOEXEC || bprm->mm == NULL) 1110 break; 1111 if (!bprm->file) { 1112 read_unlock(&binfmt_lock); 1113 return retval; 1114 } 1115 } 1116 read_unlock(&binfmt_lock); 1117 if (retval != -ENOEXEC || bprm->mm == NULL) { 1118 break; 1119#ifdef CONFIG_KMOD 1120 }else{ 1121#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) 1122 if (printable(bprm->buf[0]) && 1123 printable(bprm->buf[1]) && 1124 printable(bprm->buf[2]) && 1125 printable(bprm->buf[3])) 1126 break; /* -ENOEXEC */ 1127 request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); 1128#endif 1129 } 1130 } 1131 return retval; 1132} 1133 1134EXPORT_SYMBOL(search_binary_handler); 1135 1136/* 1137 * sys_execve() executes a new program. 1138 */ 1139int do_execve(char * filename, 1140 char __user *__user *argv, 1141 char __user *__user *envp, 1142 struct pt_regs * regs) 1143{ 1144 struct linux_binprm *bprm; 1145 struct file *file; 1146 int retval; 1147 int i; 1148 1149 retval = -ENOMEM; 1150 bprm = kmalloc(sizeof(*bprm), GFP_KERNEL); 1151 if (!bprm) 1152 goto out_ret; 1153 memset(bprm, 0, sizeof(*bprm)); 1154 1155 file = open_exec(filename); 1156 retval = PTR_ERR(file); 1157 if (IS_ERR(file)) 1158 goto out_kfree; 1159 1160 sched_exec(); 1161 1162 bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); 1163 1164 bprm->file = file; 1165 bprm->filename = filename; 1166 bprm->interp = filename; 1167 bprm->mm = mm_alloc(); 1168 retval = -ENOMEM; 1169 if (!bprm->mm) 1170 goto out_file; 1171 1172 retval = init_new_context(current, bprm->mm); 1173 if (retval < 0) 1174 goto out_mm; 1175 1176 bprm->argc = count(argv, bprm->p / sizeof(void *)); 1177 if ((retval = bprm->argc) < 0) 1178 goto out_mm; 1179 1180 bprm->envc = count(envp, bprm->p / sizeof(void *)); 1181 if ((retval = bprm->envc) < 0) 1182 goto out_mm; 1183 1184 retval = security_bprm_alloc(bprm); 1185 if (retval) 1186 goto out; 1187 1188 retval = prepare_binprm(bprm); 1189 if (retval < 0) 1190 goto out; 1191 1192 retval = copy_strings_kernel(1, &bprm->filename, bprm); 1193 if (retval < 0) 1194 goto out; 1195 1196 bprm->exec = bprm->p; 1197 retval = copy_strings(bprm->envc, envp, bprm); 1198 if (retval < 0) 1199 goto out; 1200 1201 retval = copy_strings(bprm->argc, argv, bprm); 1202 if (retval < 0) 1203 goto out; 1204 1205 retval = search_binary_handler(bprm,regs); 1206 if (retval >= 0) { 1207 free_arg_pages(bprm); 1208 1209 /* execve success */ 1210 security_bprm_free(bprm); 1211 acct_update_integrals(current); 1212 kfree(bprm); 1213 return retval; 1214 } 1215 1216out: 1217 /* Something went wrong, return the inode and free the argument pages*/ 1218 for (i = 0 ; i < MAX_ARG_PAGES ; i++) { 1219 struct page * page = bprm->page[i]; 1220 if (page) 1221 __free_page(page); 1222 } 1223 1224 if (bprm->security) 1225 security_bprm_free(bprm); 1226 1227out_mm: 1228 if (bprm->mm) 1229 mmdrop(bprm->mm); 1230 1231out_file: 1232 if (bprm->file) { 1233 allow_write_access(bprm->file); 1234 fput(bprm->file); 1235 } 1236 1237out_kfree: 1238 kfree(bprm); 1239 1240out_ret: 1241 return retval; 1242} 1243 1244int set_binfmt(struct linux_binfmt *new) 1245{ 1246 struct linux_binfmt *old = current->binfmt; 1247 1248 if (new) { 1249 if (!try_module_get(new->module)) 1250 return -1; 1251 } 1252 current->binfmt = new; 1253 if (old) 1254 module_put(old->module); 1255 return 0; 1256} 1257 1258EXPORT_SYMBOL(set_binfmt); 1259 1260#define CORENAME_MAX_SIZE 64 1261 1262/* format_corename will inspect the pattern parameter, and output a 1263 * name into corename, which must have space for at least 1264 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1265 */ 1266static void format_corename(char *corename, const char *pattern, long signr) 1267{ 1268 const char *pat_ptr = pattern; 1269 char *out_ptr = corename; 1270 char *const out_end = corename + CORENAME_MAX_SIZE; 1271 int rc; 1272 int pid_in_pattern = 0; 1273 1274 /* Repeat as long as we have more pattern to process and more output 1275 space */ 1276 while (*pat_ptr) { 1277 if (*pat_ptr != '%') { 1278 if (out_ptr == out_end) 1279 goto out; 1280 *out_ptr++ = *pat_ptr++; 1281 } else { 1282 switch (*++pat_ptr) { 1283 case 0: 1284 goto out; 1285 /* Double percent, output one percent */ 1286 case '%': 1287 if (out_ptr == out_end) 1288 goto out; 1289 *out_ptr++ = '%'; 1290 break; 1291 /* pid */ 1292 case 'p': 1293 pid_in_pattern = 1; 1294 rc = snprintf(out_ptr, out_end - out_ptr, 1295 "%d", current->tgid); 1296 if (rc > out_end - out_ptr) 1297 goto out; 1298 out_ptr += rc; 1299 break; 1300 /* uid */ 1301 case 'u': 1302 rc = snprintf(out_ptr, out_end - out_ptr, 1303 "%d", current->uid); 1304 if (rc > out_end - out_ptr) 1305 goto out; 1306 out_ptr += rc; 1307 break; 1308 /* gid */ 1309 case 'g': 1310 rc = snprintf(out_ptr, out_end - out_ptr, 1311 "%d", current->gid); 1312 if (rc > out_end - out_ptr) 1313 goto out; 1314 out_ptr += rc; 1315 break; 1316 /* signal that caused the coredump */ 1317 case 's': 1318 rc = snprintf(out_ptr, out_end - out_ptr, 1319 "%ld", signr); 1320 if (rc > out_end - out_ptr) 1321 goto out; 1322 out_ptr += rc; 1323 break; 1324 /* UNIX time of coredump */ 1325 case 't': { 1326 struct timeval tv; 1327 do_gettimeofday(&tv); 1328 rc = snprintf(out_ptr, out_end - out_ptr, 1329 "%lu", tv.tv_sec); 1330 if (rc > out_end - out_ptr) 1331 goto out; 1332 out_ptr += rc; 1333 break; 1334 } 1335 /* hostname */ 1336 case 'h': 1337 down_read(&uts_sem); 1338 rc = snprintf(out_ptr, out_end - out_ptr, 1339 "%s", system_utsname.nodename); 1340 up_read(&uts_sem); 1341 if (rc > out_end - out_ptr) 1342 goto out; 1343 out_ptr += rc; 1344 break; 1345 /* executable */ 1346 case 'e': 1347 rc = snprintf(out_ptr, out_end - out_ptr, 1348 "%s", current->comm); 1349 if (rc > out_end - out_ptr) 1350 goto out; 1351 out_ptr += rc; 1352 break; 1353 default: 1354 break; 1355 } 1356 ++pat_ptr; 1357 } 1358 } 1359 /* Backward compatibility with core_uses_pid: 1360 * 1361 * If core_pattern does not include a %p (as is the default) 1362 * and core_uses_pid is set, then .%pid will be appended to 1363 * the filename */ 1364 if (!pid_in_pattern 1365 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) { 1366 rc = snprintf(out_ptr, out_end - out_ptr, 1367 ".%d", current->tgid); 1368 if (rc > out_end - out_ptr) 1369 goto out; 1370 out_ptr += rc; 1371 } 1372 out: 1373 *out_ptr = 0; 1374} 1375 1376static void zap_threads (struct mm_struct *mm) 1377{ 1378 struct task_struct *g, *p; 1379 struct task_struct *tsk = current; 1380 struct completion *vfork_done = tsk->vfork_done; 1381 int traced = 0; 1382 1383 /* 1384 * Make sure nobody is waiting for us to release the VM, 1385 * otherwise we can deadlock when we wait on each other 1386 */ 1387 if (vfork_done) { 1388 tsk->vfork_done = NULL; 1389 complete(vfork_done); 1390 } 1391 1392 read_lock(&tasklist_lock); 1393 do_each_thread(g,p) 1394 if (mm == p->mm && p != tsk) { 1395 force_sig_specific(SIGKILL, p); 1396 mm->core_waiters++; 1397 if (unlikely(p->ptrace) && 1398 unlikely(p->parent->mm == mm)) 1399 traced = 1; 1400 } 1401 while_each_thread(g,p); 1402 1403 read_unlock(&tasklist_lock); 1404 1405 if (unlikely(traced)) { 1406 /* 1407 * We are zapping a thread and the thread it ptraces. 1408 * If the tracee went into a ptrace stop for exit tracing, 1409 * we could deadlock since the tracer is waiting for this 1410 * coredump to finish. Detach them so they can both die. 1411 */ 1412 write_lock_irq(&tasklist_lock); 1413 do_each_thread(g,p) { 1414 if (mm == p->mm && p != tsk && 1415 p->ptrace && p->parent->mm == mm) { 1416 __ptrace_unlink(p); 1417 } 1418 } while_each_thread(g,p); 1419 write_unlock_irq(&tasklist_lock); 1420 } 1421} 1422 1423static void coredump_wait(struct mm_struct *mm) 1424{ 1425 DECLARE_COMPLETION(startup_done); 1426 int core_waiters; 1427 1428 mm->core_startup_done = &startup_done; 1429 1430 zap_threads(mm); 1431 core_waiters = mm->core_waiters; 1432 up_write(&mm->mmap_sem); 1433 1434 if (core_waiters) 1435 wait_for_completion(&startup_done); 1436 BUG_ON(mm->core_waiters); 1437} 1438 1439int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1440{ 1441 char corename[CORENAME_MAX_SIZE + 1]; 1442 struct mm_struct *mm = current->mm; 1443 struct linux_binfmt * binfmt; 1444 struct inode * inode; 1445 struct file * file; 1446 int retval = 0; 1447 int fsuid = current->fsuid; 1448 int flag = 0; 1449 1450 binfmt = current->binfmt; 1451 if (!binfmt || !binfmt->core_dump) 1452 goto fail; 1453 down_write(&mm->mmap_sem); 1454 if (!mm->dumpable) { 1455 up_write(&mm->mmap_sem); 1456 goto fail; 1457 } 1458 1459 /* 1460 * We cannot trust fsuid as being the "true" uid of the 1461 * process nor do we know its entire history. We only know it 1462 * was tainted so we dump it as root in mode 2. 1463 */ 1464 if (mm->dumpable == 2) { /* Setuid core dump mode */ 1465 flag = O_EXCL; /* Stop rewrite attacks */ 1466 current->fsuid = 0; /* Dump root private */ 1467 } 1468 mm->dumpable = 0; 1469 1470 retval = -EAGAIN; 1471 spin_lock_irq(&current->sighand->siglock); 1472 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { 1473 current->signal->flags = SIGNAL_GROUP_EXIT; 1474 current->signal->group_exit_code = exit_code; 1475 retval = 0; 1476 } 1477 spin_unlock_irq(&current->sighand->siglock); 1478 if (retval) { 1479 up_write(&mm->mmap_sem); 1480 goto fail; 1481 } 1482 1483 init_completion(&mm->core_done); 1484 coredump_wait(mm); 1485 1486 /* 1487 * Clear any false indication of pending signals that might 1488 * be seen by the filesystem code called to write the core file. 1489 */ 1490 current->signal->group_stop_count = 0; 1491 clear_thread_flag(TIF_SIGPENDING); 1492 1493 if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) 1494 goto fail_unlock; 1495 1496 /* 1497 * lock_kernel() because format_corename() is controlled by sysctl, which 1498 * uses lock_kernel() 1499 */ 1500 lock_kernel(); 1501 format_corename(corename, core_pattern, signr); 1502 unlock_kernel(); 1503 file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); 1504 if (IS_ERR(file)) 1505 goto fail_unlock; 1506 inode = file->f_dentry->d_inode; 1507 if (inode->i_nlink > 1) 1508 goto close_fail; /* multiple links - don't dump */ 1509 if (d_unhashed(file->f_dentry)) 1510 goto close_fail; 1511 1512 if (!S_ISREG(inode->i_mode)) 1513 goto close_fail; 1514 if (!file->f_op) 1515 goto close_fail; 1516 if (!file->f_op->write) 1517 goto close_fail; 1518 if (do_truncate(file->f_dentry, 0, file) != 0) 1519 goto close_fail; 1520 1521 retval = binfmt->core_dump(signr, regs, file); 1522 1523 if (retval) 1524 current->signal->group_exit_code |= 0x80; 1525close_fail: 1526 filp_close(file, NULL); 1527fail_unlock: 1528 current->fsuid = fsuid; 1529 complete_all(&mm->core_done); 1530fail: 1531 return retval; 1532}