Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

seccomp: split filter prep from check and apply

In preparation for adding seccomp locking, move filter creation away
from where it is checked and applied. This will allow for locking where
no memory allocation is happening. The validation, filter attachment,
and seccomp mode setting can all happen under the future locks.

For extreme defensiveness, I've added a BUG_ON check for the calculated
size of the buffer allocation in case BPF_MAXINSN ever changes, which
shouldn't ever happen. The compiler should actually optimize out this
check since the test above it makes it impossible.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>

Kees Cook c8bee430 1d4457f9

+67 -30
+67 -30
kernel/seccomp.c
··· 18 18 #include <linux/compat.h> 19 19 #include <linux/sched.h> 20 20 #include <linux/seccomp.h> 21 + #include <linux/slab.h> 21 22 #include <linux/syscalls.h> 22 23 23 24 /* #define SECCOMP_DEBUG 1 */ ··· 28 27 #include <linux/filter.h> 29 28 #include <linux/ptrace.h> 30 29 #include <linux/security.h> 31 - #include <linux/slab.h> 32 30 #include <linux/tracehook.h> 33 31 #include <linux/uaccess.h> 34 32 ··· 213 213 214 214 #ifdef CONFIG_SECCOMP_FILTER 215 215 /** 216 - * seccomp_attach_filter: Attaches a seccomp filter to current. 216 + * seccomp_prepare_filter: Prepares a seccomp filter for use. 217 217 * @fprog: BPF program to install 218 218 * 219 - * Returns 0 on success or an errno on failure. 219 + * Returns filter on success or an ERR_PTR on failure. 220 220 */ 221 - static long seccomp_attach_filter(struct sock_fprog *fprog) 221 + static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) 222 222 { 223 223 struct seccomp_filter *filter; 224 - unsigned long fp_size = fprog->len * sizeof(struct sock_filter); 225 - unsigned long total_insns = fprog->len; 224 + unsigned long fp_size; 226 225 struct sock_filter *fp; 227 226 int new_len; 228 227 long ret; 229 228 230 229 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) 231 - return -EINVAL; 232 - 233 - for (filter = current->seccomp.filter; filter; filter = filter->prev) 234 - total_insns += filter->prog->len + 4; /* include a 4 instr penalty */ 235 - if (total_insns > MAX_INSNS_PER_PATH) 236 - return -ENOMEM; 230 + return ERR_PTR(-EINVAL); 231 + BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); 232 + fp_size = fprog->len * sizeof(struct sock_filter); 237 233 238 234 /* 239 235 * Installing a seccomp filter requires that the task has ··· 240 244 if (!task_no_new_privs(current) && 241 245 security_capable_noaudit(current_cred(), current_user_ns(), 242 246 CAP_SYS_ADMIN) != 0) 243 - return -EACCES; 247 + return ERR_PTR(-EACCES); 244 248 245 249 fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); 246 250 if (!fp) 247 - return -ENOMEM; 251 + return ERR_PTR(-ENOMEM); 248 252 249 253 /* Copy the instructions from fprog. */ 250 254 ret = -EFAULT; ··· 288 292 289 293 sk_filter_select_runtime(filter->prog); 290 294 291 - /* 292 - * If there is an existing filter, make it the prev and don't drop its 293 - * task reference. 294 - */ 295 - filter->prev = current->seccomp.filter; 296 - current->seccomp.filter = filter; 297 - return 0; 295 + return filter; 298 296 299 297 free_filter_prog: 300 298 kfree(filter->prog); ··· 296 306 kfree(filter); 297 307 free_prog: 298 308 kfree(fp); 299 - return ret; 309 + return ERR_PTR(ret); 300 310 } 301 311 302 312 /** 303 - * seccomp_attach_user_filter - attaches a user-supplied sock_fprog 313 + * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog 304 314 * @user_filter: pointer to the user data containing a sock_fprog. 305 315 * 306 316 * Returns 0 on success and non-zero otherwise. 307 317 */ 308 - static long seccomp_attach_user_filter(const char __user *user_filter) 318 + static struct seccomp_filter * 319 + seccomp_prepare_user_filter(const char __user *user_filter) 309 320 { 310 321 struct sock_fprog fprog; 311 - long ret = -EFAULT; 322 + struct seccomp_filter *filter = ERR_PTR(-EFAULT); 312 323 313 324 #ifdef CONFIG_COMPAT 314 325 if (is_compat_task()) { ··· 322 331 #endif 323 332 if (copy_from_user(&fprog, user_filter, sizeof(fprog))) 324 333 goto out; 325 - ret = seccomp_attach_filter(&fprog); 334 + filter = seccomp_prepare_filter(&fprog); 326 335 out: 327 - return ret; 336 + return filter; 337 + } 338 + 339 + /** 340 + * seccomp_attach_filter: validate and attach filter 341 + * @flags: flags to change filter behavior 342 + * @filter: seccomp filter to add to the current process 343 + * 344 + * Returns 0 on success, -ve on error. 345 + */ 346 + static long seccomp_attach_filter(unsigned int flags, 347 + struct seccomp_filter *filter) 348 + { 349 + unsigned long total_insns; 350 + struct seccomp_filter *walker; 351 + 352 + /* Validate resulting filter length. */ 353 + total_insns = filter->prog->len; 354 + for (walker = current->seccomp.filter; walker; walker = walker->prev) 355 + total_insns += walker->prog->len + 4; /* 4 instr penalty */ 356 + if (total_insns > MAX_INSNS_PER_PATH) 357 + return -ENOMEM; 358 + 359 + /* 360 + * If there is an existing filter, make it the prev and don't drop its 361 + * task reference. 362 + */ 363 + filter->prev = current->seccomp.filter; 364 + current->seccomp.filter = filter; 365 + 366 + return 0; 328 367 } 329 368 330 369 /* get_seccomp_filter - increments the reference count of the filter on @tsk */ ··· 367 346 atomic_inc(&orig->usage); 368 347 } 369 348 349 + static inline void seccomp_filter_free(struct seccomp_filter *filter) 350 + { 351 + if (filter) { 352 + sk_filter_free(filter->prog); 353 + kfree(filter); 354 + } 355 + } 356 + 370 357 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ 371 358 void put_seccomp_filter(struct task_struct *tsk) 372 359 { ··· 383 354 while (orig && atomic_dec_and_test(&orig->usage)) { 384 355 struct seccomp_filter *freeme = orig; 385 356 orig = orig->prev; 386 - sk_filter_free(freeme->prog); 387 - kfree(freeme); 357 + seccomp_filter_free(freeme); 388 358 } 389 359 } 390 360 ··· 561 533 const char __user *filter) 562 534 { 563 535 const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; 536 + struct seccomp_filter *prepared = NULL; 564 537 long ret = -EINVAL; 565 538 566 539 /* Validate flags. */ 567 540 if (flags != 0) 568 541 goto out; 569 542 543 + /* Prepare the new filter before holding any locks. */ 544 + prepared = seccomp_prepare_user_filter(filter); 545 + if (IS_ERR(prepared)) 546 + return PTR_ERR(prepared); 547 + 570 548 if (!seccomp_may_assign_mode(seccomp_mode)) 571 549 goto out; 572 550 573 - ret = seccomp_attach_user_filter(filter); 551 + ret = seccomp_attach_filter(flags, prepared); 574 552 if (ret) 575 553 goto out; 554 + /* Do not free the successfully attached filter. */ 555 + prepared = NULL; 576 556 577 557 seccomp_assign_mode(seccomp_mode); 578 558 out: 559 + seccomp_filter_free(prepared); 579 560 return ret; 580 561 } 581 562 #else