Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

exec: separate MM_ANONPAGES and RLIMIT_STACK accounting

get_arg_page() checks bprm->rlim_stack.rlim_cur and re-calculates the
"extra" size for argv/envp pointers every time, this is a bit ugly and
even not strictly correct: acct_arg_size() must not account this size.

Remove all the rlimit code in get_arg_page(). Instead, add bprm->argmin
calculated once at the start of __do_execve_file() and change
copy_strings to check bprm->p >= bprm->argmin.

The patch adds the new helper, prepare_arg_pages() which initializes
bprm->argc/envc and bprm->argmin.

[oleg@redhat.com: fix !CONFIG_MMU version of get_arg_page()]
Link: http://lkml.kernel.org/r/20181126122307.GA1660@redhat.com
[akpm@linux-foundation.org: use max_t]
Link: http://lkml.kernel.org/r/20181112160910.GA28440@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Oleg Nesterov and committed by
Linus Torvalds
655c16a8 8099b047

+53 -53
+52 -53
fs/exec.c
··· 218 218 if (ret <= 0) 219 219 return NULL; 220 220 221 - if (write) { 222 - unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; 223 - unsigned long ptr_size, limit; 224 - 225 - /* 226 - * Since the stack will hold pointers to the strings, we 227 - * must account for them as well. 228 - * 229 - * The size calculation is the entire vma while each arg page is 230 - * built, so each time we get here it's calculating how far it 231 - * is currently (rather than each call being just the newly 232 - * added size from the arg page). As a result, we need to 233 - * always add the entire size of the pointers, so that on the 234 - * last call to get_arg_page() we'll actually have the entire 235 - * correct size. 236 - */ 237 - ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); 238 - if (ptr_size > ULONG_MAX - size) 239 - goto fail; 240 - size += ptr_size; 241 - 242 - acct_arg_size(bprm, size / PAGE_SIZE); 243 - 244 - /* 245 - * We've historically supported up to 32 pages (ARG_MAX) 246 - * of argument strings even with small stacks 247 - */ 248 - if (size <= ARG_MAX) 249 - return page; 250 - 251 - /* 252 - * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM 253 - * (whichever is smaller) for the argv+env strings. 254 - * This ensures that: 255 - * - the remaining binfmt code will not run out of stack space, 256 - * - the program will have a reasonable amount of stack left 257 - * to work from. 258 - */ 259 - limit = _STK_LIM / 4 * 3; 260 - limit = min(limit, bprm->rlim_stack.rlim_cur / 4); 261 - if (size > limit) 262 - goto fail; 263 - } 221 + if (write) 222 + acct_arg_size(bprm, vma_pages(bprm->vma)); 264 223 265 224 return page; 266 - 267 - fail: 268 - put_page(page); 269 - return NULL; 270 225 } 271 226 272 227 static void put_arg_page(struct page *page) ··· 447 492 return i; 448 493 } 449 494 495 + static int prepare_arg_pages(struct linux_binprm *bprm, 496 + struct user_arg_ptr argv, struct user_arg_ptr envp) 497 + { 498 + unsigned long limit, ptr_size; 499 + 500 + bprm->argc = count(argv, MAX_ARG_STRINGS); 501 + if (bprm->argc < 0) 502 + return bprm->argc; 503 + 504 + bprm->envc = count(envp, MAX_ARG_STRINGS); 505 + if (bprm->envc < 0) 506 + return bprm->envc; 507 + 508 + /* 509 + * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM 510 + * (whichever is smaller) for the argv+env strings. 511 + * This ensures that: 512 + * - the remaining binfmt code will not run out of stack space, 513 + * - the program will have a reasonable amount of stack left 514 + * to work from. 515 + */ 516 + limit = _STK_LIM / 4 * 3; 517 + limit = min(limit, bprm->rlim_stack.rlim_cur / 4); 518 + /* 519 + * We've historically supported up to 32 pages (ARG_MAX) 520 + * of argument strings even with small stacks 521 + */ 522 + limit = max_t(unsigned long, limit, ARG_MAX); 523 + /* 524 + * We must account for the size of all the argv and envp pointers to 525 + * the argv and envp strings, since they will also take up space in 526 + * the stack. They aren't stored until much later when we can't 527 + * signal to the parent that the child has run out of stack space. 528 + * Instead, calculate it here so it's possible to fail gracefully. 529 + */ 530 + ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); 531 + if (limit <= ptr_size) 532 + return -E2BIG; 533 + limit -= ptr_size; 534 + 535 + bprm->argmin = bprm->p - limit; 536 + return 0; 537 + } 538 + 450 539 /* 451 540 * 'copy_strings()' copies argument/environment strings from the old 452 541 * processes's memory to the new process's stack. The call to get_user_pages() ··· 526 527 pos = bprm->p; 527 528 str += len; 528 529 bprm->p -= len; 530 + #ifdef CONFIG_MMU 531 + if (bprm->p < bprm->argmin) 532 + goto out; 533 + #endif 529 534 530 535 while (len > 0) { 531 536 int offset, bytes_to_copy; ··· 1792 1789 if (retval) 1793 1790 goto out_unmark; 1794 1791 1795 - bprm->argc = count(argv, MAX_ARG_STRINGS); 1796 - if ((retval = bprm->argc) < 0) 1797 - goto out; 1798 - 1799 - bprm->envc = count(envp, MAX_ARG_STRINGS); 1800 - if ((retval = bprm->envc) < 0) 1792 + retval = prepare_arg_pages(bprm, argv, envp); 1793 + if (retval < 0) 1801 1794 goto out; 1802 1795 1803 1796 retval = prepare_binprm(bprm);
+1
include/linux/binfmts.h
··· 25 25 #endif 26 26 struct mm_struct *mm; 27 27 unsigned long p; /* current top of mem */ 28 + unsigned long argmin; /* rlimit marker for copy_strings() */ 28 29 unsigned int 29 30 /* 30 31 * True after the bprm_set_creds hook has been called once