Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'execve-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull execve updates from Kees Cook:

- Provide knob to change (previously fixed) coredump NOTES size
(Allen Pais)

- Add sched_prepare_exec tracepoint (Marco Elver)

- Make /proc/$pid/auxv work under binfmt_elf_fdpic (Max Filippov)

- Convert ARCH_HAVE_EXTRA_ELF_NOTES to proper Kconfig (Vignesh
Balasubramanian)

- Leave a gap between .bss and brk

* tag 'execve-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
fs/coredump: Enable dynamic configuration of max file note size
binfmt_elf_fdpic: fix /proc/<pid>/auxv
binfmt_elf: Leave a gap between .bss and brk
Replace macro "ARCH_HAVE_EXTRA_ELF_NOTES" with kconfig
tracing: Add sched_prepare_exec tracepoint

+121 -52
+9
arch/Kconfig
··· 510 510 config ARCH_HAVE_NMI_SAFE_CMPXCHG 511 511 bool 512 512 513 + config ARCH_HAVE_EXTRA_ELF_NOTES 514 + bool 515 + help 516 + An architecture should select this in order to enable adding an 517 + arch-specific ELF note section to core files. It must provide two 518 + functions: elf_coredump_extra_notes_size() and 519 + elf_coredump_extra_notes_write() which are invoked by the ELF core 520 + dumper. 521 + 513 522 config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS 514 523 bool 515 524
+1
arch/powerpc/Kconfig
··· 156 156 select ARCH_HAS_UACCESS_FLUSHCACHE 157 157 select ARCH_HAS_UBSAN 158 158 select ARCH_HAVE_NMI_SAFE_CMPXCHG 159 + select ARCH_HAVE_EXTRA_ELF_NOTES if SPU_BASE 159 160 select ARCH_KEEP_MEMBLOCK 160 161 select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU 161 162 select ARCH_MIGHT_HAVE_PC_PARPORT
-2
arch/powerpc/include/asm/elf.h
··· 127 127 /* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */ 128 128 #define NT_SPU 1 129 129 130 - #define ARCH_HAVE_EXTRA_ELF_NOTES 131 - 132 130 #endif /* CONFIG_SPU_BASE */ 133 131 134 132 #ifdef CONFIG_PPC64
+8 -2
fs/binfmt_elf.c
··· 1262 1262 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && 1263 1263 elf_ex->e_type == ET_DYN && !interpreter) { 1264 1264 mm->brk = mm->start_brk = ELF_ET_DYN_BASE; 1265 + } else { 1266 + /* Otherwise leave a gap between .bss and brk. */ 1267 + mm->brk = mm->start_brk = mm->brk + PAGE_SIZE; 1265 1268 } 1266 1269 1267 1270 mm->brk = mm->start_brk = arch_randomize_brk(mm); ··· 1567 1564 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); 1568 1565 } 1569 1566 1570 - #define MAX_FILE_NOTE_SIZE (4*1024*1024) 1571 1567 /* 1572 1568 * Format of NT_FILE note: 1573 1569 * ··· 1594 1592 1595 1593 names_ofs = (2 + 3 * count) * sizeof(data[0]); 1596 1594 alloc: 1597 - if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ 1595 + /* paranoia check */ 1596 + if (size >= core_file_note_size_limit) { 1597 + pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n", 1598 + size); 1598 1599 return -EINVAL; 1600 + } 1599 1601 size = round_up(size, PAGE_SIZE); 1600 1602 /* 1601 1603 * "size" can be 0 here legitimately.
+40 -47
fs/binfmt_elf_fdpic.c
··· 505 505 char *k_platform, *k_base_platform; 506 506 char __user *u_platform, *u_base_platform, *p; 507 507 int loop; 508 - int nr; /* reset for each csp adjustment */ 509 508 unsigned long flags = 0; 509 + int ei_index; 510 + elf_addr_t *elf_info; 510 511 511 512 #ifdef CONFIG_MMU 512 513 /* In some cases (e.g. Hyper-Threading), we want to avoid L1 evictions ··· 602 601 csp -= sp & 15UL; 603 602 sp -= sp & 15UL; 604 603 605 - /* put the ELF interpreter info on the stack */ 606 - #define NEW_AUX_ENT(id, val) \ 607 - do { \ 608 - struct { unsigned long _id, _val; } __user *ent, v; \ 609 - \ 610 - ent = (void __user *) csp; \ 611 - v._id = (id); \ 612 - v._val = (val); \ 613 - if (copy_to_user(ent + nr, &v, sizeof(v))) \ 614 - return -EFAULT; \ 615 - nr++; \ 604 + /* Create the ELF interpreter info */ 605 + elf_info = (elf_addr_t *)mm->saved_auxv; 606 + /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ 607 + #define NEW_AUX_ENT(id, val) \ 608 + do { \ 609 + *elf_info++ = id; \ 610 + *elf_info++ = val; \ 616 611 } while (0) 617 612 618 - nr = 0; 619 - csp -= 2 * sizeof(unsigned long); 620 - NEW_AUX_ENT(AT_NULL, 0); 621 - if (k_platform) { 622 - nr = 0; 623 - csp -= 2 * sizeof(unsigned long); 624 - NEW_AUX_ENT(AT_PLATFORM, 625 - (elf_addr_t) (unsigned long) u_platform); 626 - } 627 - 628 - if (k_base_platform) { 629 - nr = 0; 630 - csp -= 2 * sizeof(unsigned long); 631 - NEW_AUX_ENT(AT_BASE_PLATFORM, 632 - (elf_addr_t) (unsigned long) u_base_platform); 633 - } 634 - 635 - if (bprm->have_execfd) { 636 - nr = 0; 637 - csp -= 2 * sizeof(unsigned long); 638 - NEW_AUX_ENT(AT_EXECFD, bprm->execfd); 639 - } 640 - 641 - nr = 0; 642 - csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); 613 + #ifdef ARCH_DLINFO 614 + /* 615 + * ARCH_DLINFO must come first so PPC can do its special alignment of 616 + * AUXV. 617 + * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in 618 + * ARCH_DLINFO changes 619 + */ 620 + ARCH_DLINFO; 621 + #endif 643 622 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); 644 623 #ifdef ELF_HWCAP2 645 624 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); ··· 640 659 NEW_AUX_ENT(AT_EGID, (elf_addr_t) from_kgid_munged(cred->user_ns, cred->egid)); 641 660 NEW_AUX_ENT(AT_SECURE, bprm->secureexec); 642 661 NEW_AUX_ENT(AT_EXECFN, bprm->exec); 643 - 644 - #ifdef ARCH_DLINFO 645 - nr = 0; 646 - csp -= AT_VECTOR_SIZE_ARCH * 2 * sizeof(unsigned long); 647 - 648 - /* ARCH_DLINFO must come last so platform specific code can enforce 649 - * special alignment requirements on the AUXV if necessary (eg. PPC). 650 - */ 651 - ARCH_DLINFO; 652 - #endif 662 + if (k_platform) 663 + NEW_AUX_ENT(AT_PLATFORM, 664 + (elf_addr_t)(unsigned long)u_platform); 665 + if (k_base_platform) 666 + NEW_AUX_ENT(AT_BASE_PLATFORM, 667 + (elf_addr_t)(unsigned long)u_base_platform); 668 + if (bprm->have_execfd) 669 + NEW_AUX_ENT(AT_EXECFD, bprm->execfd); 653 670 #undef NEW_AUX_ENT 671 + /* AT_NULL is zero; clear the rest too */ 672 + memset(elf_info, 0, (char *)mm->saved_auxv + 673 + sizeof(mm->saved_auxv) - (char *)elf_info); 674 + 675 + /* And advance past the AT_NULL entry. */ 676 + elf_info += 2; 677 + 678 + ei_index = elf_info - (elf_addr_t *)mm->saved_auxv; 679 + csp -= ei_index * sizeof(elf_addr_t); 680 + 681 + /* Put the elf_info on the stack in the right place. */ 682 + if (copy_to_user((void __user *)csp, mm->saved_auxv, 683 + ei_index * sizeof(elf_addr_t))) 684 + return -EFAULT; 654 685 655 686 /* allocate room for argv[] and envv[] */ 656 687 csp -= (bprm->envc + 1) * sizeof(elf_caddr_t);
+17
fs/coredump.c
··· 56 56 static bool dump_vma_snapshot(struct coredump_params *cprm); 57 57 static void free_vma_snapshot(struct coredump_params *cprm); 58 58 59 + #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024) 60 + /* Define a reasonable max cap */ 61 + #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024) 62 + 59 63 static int core_uses_pid; 60 64 static unsigned int core_pipe_limit; 61 65 static char core_pattern[CORENAME_MAX_SIZE] = "core"; 62 66 static int core_name_size = CORENAME_MAX_SIZE; 67 + unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; 63 68 64 69 struct core_name { 65 70 char *corename; ··· 1003 998 return error; 1004 999 } 1005 1000 1001 + static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT; 1002 + static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX; 1003 + 1006 1004 static struct ctl_table coredump_sysctls[] = { 1007 1005 { 1008 1006 .procname = "core_uses_pid", ··· 1027 1019 .maxlen = sizeof(unsigned int), 1028 1020 .mode = 0644, 1029 1021 .proc_handler = proc_dointvec, 1022 + }, 1023 + { 1024 + .procname = "core_file_note_size_limit", 1025 + .data = &core_file_note_size_limit, 1026 + .maxlen = sizeof(unsigned int), 1027 + .mode = 0644, 1028 + .proc_handler = proc_douintvec_minmax, 1029 + .extra1 = (unsigned int *)&core_file_note_size_min, 1030 + .extra2 = (unsigned int *)&core_file_note_size_max, 1030 1031 }, 1031 1032 }; 1032 1033
+8
fs/exec.c
··· 1268 1268 return retval; 1269 1269 1270 1270 /* 1271 + * This tracepoint marks the point before flushing the old exec where 1272 + * the current task is still unchanged, but errors are fatal (point of 1273 + * no return). The later "sched_process_exec" tracepoint is called after 1274 + * the current task has successfully switched to the new exec. 1275 + */ 1276 + trace_sched_prepare_exec(current, bprm); 1277 + 1278 + /* 1271 1279 * Ensure all future errors are fatal. 1272 1280 */ 1273 1281 bprm->point_of_no_return = true;
+2
include/linux/coredump.h
··· 30 30 struct core_vma_metadata *vma_meta; 31 31 }; 32 32 33 + extern unsigned int core_file_note_size_limit; 34 + 33 35 /* 34 36 * These are the only things you should do on a core-file: use only these 35 37 * functions to write out all the necessary info.
+1 -1
include/linux/elf.h
··· 65 65 struct file; 66 66 struct coredump_params; 67 67 68 - #ifndef ARCH_HAVE_EXTRA_ELF_NOTES 68 + #ifndef CONFIG_ARCH_HAVE_EXTRA_ELF_NOTES 69 69 static inline int elf_coredump_extra_notes_size(void) { return 0; } 70 70 static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { return 0; } 71 71 #else
+35
include/trace/events/sched.h
··· 420 420 __entry->pid, __entry->old_pid) 421 421 ); 422 422 423 + /** 424 + * sched_prepare_exec - called before setting up new exec 425 + * @task: pointer to the current task 426 + * @bprm: pointer to linux_binprm used for new exec 427 + * 428 + * Called before flushing the old exec, where @task is still unchanged, but at 429 + * the point of no return during switching to the new exec. At the point it is 430 + * called the exec will either succeed, or on failure terminate the task. Also 431 + * see the "sched_process_exec" tracepoint, which is called right after @task 432 + * has successfully switched to the new exec. 433 + */ 434 + TRACE_EVENT(sched_prepare_exec, 435 + 436 + TP_PROTO(struct task_struct *task, struct linux_binprm *bprm), 437 + 438 + TP_ARGS(task, bprm), 439 + 440 + TP_STRUCT__entry( 441 + __string( interp, bprm->interp ) 442 + __string( filename, bprm->filename ) 443 + __field( pid_t, pid ) 444 + __string( comm, task->comm ) 445 + ), 446 + 447 + TP_fast_assign( 448 + __assign_str(interp, bprm->interp); 449 + __assign_str(filename, bprm->filename); 450 + __entry->pid = task->pid; 451 + __assign_str(comm, task->comm); 452 + ), 453 + 454 + TP_printk("interp=%s filename=%s pid=%d comm=%s", 455 + __get_str(interp), __get_str(filename), 456 + __entry->pid, __get_str(comm)) 457 + ); 423 458 424 459 #ifdef CONFIG_SCHEDSTATS 425 460 #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT