at v2.6.26 18 kB view raw
1/* Common capabilities, needed by capability.o and root_plug.o 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 */ 9 10#include <linux/capability.h> 11#include <linux/module.h> 12#include <linux/init.h> 13#include <linux/kernel.h> 14#include <linux/security.h> 15#include <linux/file.h> 16#include <linux/mm.h> 17#include <linux/mman.h> 18#include <linux/pagemap.h> 19#include <linux/swap.h> 20#include <linux/skbuff.h> 21#include <linux/netlink.h> 22#include <linux/ptrace.h> 23#include <linux/xattr.h> 24#include <linux/hugetlb.h> 25#include <linux/mount.h> 26#include <linux/sched.h> 27#include <linux/prctl.h> 28#include <linux/securebits.h> 29 30int cap_netlink_send(struct sock *sk, struct sk_buff *skb) 31{ 32 NETLINK_CB(skb).eff_cap = current->cap_effective; 33 return 0; 34} 35 36int cap_netlink_recv(struct sk_buff *skb, int cap) 37{ 38 if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) 39 return -EPERM; 40 return 0; 41} 42 43EXPORT_SYMBOL(cap_netlink_recv); 44 45/* 46 * NOTE WELL: cap_capable() cannot be used like the kernel's capable() 47 * function. That is, it has the reverse semantics: cap_capable() 48 * returns 0 when a task has a capability, but the kernel's capable() 49 * returns 1 for this case. 50 */ 51int cap_capable (struct task_struct *tsk, int cap) 52{ 53 /* Derived from include/linux/sched.h:capable. */ 54 if (cap_raised(tsk->cap_effective, cap)) 55 return 0; 56 return -EPERM; 57} 58 59int cap_settime(struct timespec *ts, struct timezone *tz) 60{ 61 if (!capable(CAP_SYS_TIME)) 62 return -EPERM; 63 return 0; 64} 65 66int cap_ptrace (struct task_struct *parent, struct task_struct *child) 67{ 68 /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ 69 if (!cap_issubset(child->cap_permitted, parent->cap_permitted) && 70 !__capable(parent, CAP_SYS_PTRACE)) 71 return -EPERM; 72 return 0; 73} 74 75int cap_capget (struct task_struct *target, kernel_cap_t *effective, 76 kernel_cap_t *inheritable, kernel_cap_t *permitted) 77{ 78 /* Derived from kernel/capability.c:sys_capget. */ 79 *effective = target->cap_effective; 80 *inheritable = target->cap_inheritable; 81 *permitted = target->cap_permitted; 82 return 0; 83} 84 85#ifdef CONFIG_SECURITY_FILE_CAPABILITIES 86 87static inline int cap_block_setpcap(struct task_struct *target) 88{ 89 /* 90 * No support for remote process capability manipulation with 91 * filesystem capability support. 92 */ 93 return (target != current); 94} 95 96static inline int cap_inh_is_capped(void) 97{ 98 /* 99 * Return 1 if changes to the inheritable set are limited 100 * to the old permitted set. That is, if the current task 101 * does *not* possess the CAP_SETPCAP capability. 102 */ 103 return (cap_capable(current, CAP_SETPCAP) != 0); 104} 105 106static inline int cap_limit_ptraced_target(void) { return 1; } 107 108#else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 109 110static inline int cap_block_setpcap(struct task_struct *t) { return 0; } 111static inline int cap_inh_is_capped(void) { return 1; } 112static inline int cap_limit_ptraced_target(void) 113{ 114 return !capable(CAP_SETPCAP); 115} 116 117#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 118 119int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, 120 kernel_cap_t *inheritable, kernel_cap_t *permitted) 121{ 122 if (cap_block_setpcap(target)) { 123 return -EPERM; 124 } 125 if (cap_inh_is_capped() 126 && !cap_issubset(*inheritable, 127 cap_combine(target->cap_inheritable, 128 current->cap_permitted))) { 129 /* incapable of using this inheritable set */ 130 return -EPERM; 131 } 132 if (!cap_issubset(*inheritable, 133 cap_combine(target->cap_inheritable, 134 current->cap_bset))) { 135 /* no new pI capabilities outside bounding set */ 136 return -EPERM; 137 } 138 139 /* verify restrictions on target's new Permitted set */ 140 if (!cap_issubset (*permitted, 141 cap_combine (target->cap_permitted, 142 current->cap_permitted))) { 143 return -EPERM; 144 } 145 146 /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ 147 if (!cap_issubset (*effective, *permitted)) { 148 return -EPERM; 149 } 150 151 return 0; 152} 153 154void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, 155 kernel_cap_t *inheritable, kernel_cap_t *permitted) 156{ 157 target->cap_effective = *effective; 158 target->cap_inheritable = *inheritable; 159 target->cap_permitted = *permitted; 160} 161 162static inline void bprm_clear_caps(struct linux_binprm *bprm) 163{ 164 cap_clear(bprm->cap_inheritable); 165 cap_clear(bprm->cap_permitted); 166 bprm->cap_effective = false; 167} 168 169#ifdef CONFIG_SECURITY_FILE_CAPABILITIES 170 171int cap_inode_need_killpriv(struct dentry *dentry) 172{ 173 struct inode *inode = dentry->d_inode; 174 int error; 175 176 if (!inode->i_op || !inode->i_op->getxattr) 177 return 0; 178 179 error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); 180 if (error <= 0) 181 return 0; 182 return 1; 183} 184 185int cap_inode_killpriv(struct dentry *dentry) 186{ 187 struct inode *inode = dentry->d_inode; 188 189 if (!inode->i_op || !inode->i_op->removexattr) 190 return 0; 191 192 return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); 193} 194 195static inline int cap_from_disk(struct vfs_cap_data *caps, 196 struct linux_binprm *bprm, unsigned size) 197{ 198 __u32 magic_etc; 199 unsigned tocopy, i; 200 201 if (size < sizeof(magic_etc)) 202 return -EINVAL; 203 204 magic_etc = le32_to_cpu(caps->magic_etc); 205 206 switch ((magic_etc & VFS_CAP_REVISION_MASK)) { 207 case VFS_CAP_REVISION_1: 208 if (size != XATTR_CAPS_SZ_1) 209 return -EINVAL; 210 tocopy = VFS_CAP_U32_1; 211 break; 212 case VFS_CAP_REVISION_2: 213 if (size != XATTR_CAPS_SZ_2) 214 return -EINVAL; 215 tocopy = VFS_CAP_U32_2; 216 break; 217 default: 218 return -EINVAL; 219 } 220 221 if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) { 222 bprm->cap_effective = true; 223 } else { 224 bprm->cap_effective = false; 225 } 226 227 for (i = 0; i < tocopy; ++i) { 228 bprm->cap_permitted.cap[i] = 229 le32_to_cpu(caps->data[i].permitted); 230 bprm->cap_inheritable.cap[i] = 231 le32_to_cpu(caps->data[i].inheritable); 232 } 233 while (i < VFS_CAP_U32) { 234 bprm->cap_permitted.cap[i] = 0; 235 bprm->cap_inheritable.cap[i] = 0; 236 i++; 237 } 238 239 return 0; 240} 241 242/* Locate any VFS capabilities: */ 243static int get_file_caps(struct linux_binprm *bprm) 244{ 245 struct dentry *dentry; 246 int rc = 0; 247 struct vfs_cap_data vcaps; 248 struct inode *inode; 249 250 if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { 251 bprm_clear_caps(bprm); 252 return 0; 253 } 254 255 dentry = dget(bprm->file->f_dentry); 256 inode = dentry->d_inode; 257 if (!inode->i_op || !inode->i_op->getxattr) 258 goto out; 259 260 rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps, 261 XATTR_CAPS_SZ); 262 if (rc == -ENODATA || rc == -EOPNOTSUPP) { 263 /* no data, that's ok */ 264 rc = 0; 265 goto out; 266 } 267 if (rc < 0) 268 goto out; 269 270 rc = cap_from_disk(&vcaps, bprm, rc); 271 if (rc) 272 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", 273 __func__, rc, bprm->filename); 274 275out: 276 dput(dentry); 277 if (rc) 278 bprm_clear_caps(bprm); 279 280 return rc; 281} 282 283#else 284int cap_inode_need_killpriv(struct dentry *dentry) 285{ 286 return 0; 287} 288 289int cap_inode_killpriv(struct dentry *dentry) 290{ 291 return 0; 292} 293 294static inline int get_file_caps(struct linux_binprm *bprm) 295{ 296 bprm_clear_caps(bprm); 297 return 0; 298} 299#endif 300 301int cap_bprm_set_security (struct linux_binprm *bprm) 302{ 303 int ret; 304 305 ret = get_file_caps(bprm); 306 if (ret) 307 printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n", 308 __func__, ret, bprm->filename); 309 310 /* To support inheritance of root-permissions and suid-root 311 * executables under compatibility mode, we raise all three 312 * capability sets for the file. 313 * 314 * If only the real uid is 0, we only raise the inheritable 315 * and permitted sets of the executable file. 316 */ 317 318 if (!issecure (SECURE_NOROOT)) { 319 if (bprm->e_uid == 0 || current->uid == 0) { 320 cap_set_full (bprm->cap_inheritable); 321 cap_set_full (bprm->cap_permitted); 322 } 323 if (bprm->e_uid == 0) 324 bprm->cap_effective = true; 325 } 326 327 return ret; 328} 329 330void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) 331{ 332 /* Derived from fs/exec.c:compute_creds. */ 333 kernel_cap_t new_permitted, working; 334 335 new_permitted = cap_intersect(bprm->cap_permitted, 336 current->cap_bset); 337 working = cap_intersect(bprm->cap_inheritable, 338 current->cap_inheritable); 339 new_permitted = cap_combine(new_permitted, working); 340 341 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || 342 !cap_issubset (new_permitted, current->cap_permitted)) { 343 set_dumpable(current->mm, suid_dumpable); 344 current->pdeath_signal = 0; 345 346 if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { 347 if (!capable(CAP_SETUID)) { 348 bprm->e_uid = current->uid; 349 bprm->e_gid = current->gid; 350 } 351 if (cap_limit_ptraced_target()) { 352 new_permitted = 353 cap_intersect(new_permitted, 354 current->cap_permitted); 355 } 356 } 357 } 358 359 current->suid = current->euid = current->fsuid = bprm->e_uid; 360 current->sgid = current->egid = current->fsgid = bprm->e_gid; 361 362 /* For init, we want to retain the capabilities set 363 * in the init_task struct. Thus we skip the usual 364 * capability rules */ 365 if (!is_global_init(current)) { 366 current->cap_permitted = new_permitted; 367 if (bprm->cap_effective) 368 current->cap_effective = new_permitted; 369 else 370 cap_clear(current->cap_effective); 371 } 372 373 /* AUD: Audit candidate if current->cap_effective is set */ 374 375 current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); 376} 377 378int cap_bprm_secureexec (struct linux_binprm *bprm) 379{ 380 if (current->uid != 0) { 381 if (bprm->cap_effective) 382 return 1; 383 if (!cap_isclear(bprm->cap_permitted)) 384 return 1; 385 if (!cap_isclear(bprm->cap_inheritable)) 386 return 1; 387 } 388 389 return (current->euid != current->uid || 390 current->egid != current->gid); 391} 392 393int cap_inode_setxattr(struct dentry *dentry, const char *name, 394 const void *value, size_t size, int flags) 395{ 396 if (!strcmp(name, XATTR_NAME_CAPS)) { 397 if (!capable(CAP_SETFCAP)) 398 return -EPERM; 399 return 0; 400 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 401 sizeof(XATTR_SECURITY_PREFIX) - 1) && 402 !capable(CAP_SYS_ADMIN)) 403 return -EPERM; 404 return 0; 405} 406 407int cap_inode_removexattr(struct dentry *dentry, const char *name) 408{ 409 if (!strcmp(name, XATTR_NAME_CAPS)) { 410 if (!capable(CAP_SETFCAP)) 411 return -EPERM; 412 return 0; 413 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 414 sizeof(XATTR_SECURITY_PREFIX) - 1) && 415 !capable(CAP_SYS_ADMIN)) 416 return -EPERM; 417 return 0; 418} 419 420/* moved from kernel/sys.c. */ 421/* 422 * cap_emulate_setxuid() fixes the effective / permitted capabilities of 423 * a process after a call to setuid, setreuid, or setresuid. 424 * 425 * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of 426 * {r,e,s}uid != 0, the permitted and effective capabilities are 427 * cleared. 428 * 429 * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective 430 * capabilities of the process are cleared. 431 * 432 * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective 433 * capabilities are set to the permitted capabilities. 434 * 435 * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 436 * never happen. 437 * 438 * -astor 439 * 440 * cevans - New behaviour, Oct '99 441 * A process may, via prctl(), elect to keep its capabilities when it 442 * calls setuid() and switches away from uid==0. Both permitted and 443 * effective sets will be retained. 444 * Without this change, it was impossible for a daemon to drop only some 445 * of its privilege. The call to setuid(!=0) would drop all privileges! 446 * Keeping uid 0 is not an option because uid 0 owns too many vital 447 * files.. 448 * Thanks to Olaf Kirch and Peter Benie for spotting this. 449 */ 450static inline void cap_emulate_setxuid (int old_ruid, int old_euid, 451 int old_suid) 452{ 453 if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && 454 (current->uid != 0 && current->euid != 0 && current->suid != 0) && 455 !issecure(SECURE_KEEP_CAPS)) { 456 cap_clear (current->cap_permitted); 457 cap_clear (current->cap_effective); 458 } 459 if (old_euid == 0 && current->euid != 0) { 460 cap_clear (current->cap_effective); 461 } 462 if (old_euid != 0 && current->euid == 0) { 463 current->cap_effective = current->cap_permitted; 464 } 465} 466 467int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, 468 int flags) 469{ 470 switch (flags) { 471 case LSM_SETID_RE: 472 case LSM_SETID_ID: 473 case LSM_SETID_RES: 474 /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ 475 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 476 cap_emulate_setxuid (old_ruid, old_euid, old_suid); 477 } 478 break; 479 case LSM_SETID_FS: 480 { 481 uid_t old_fsuid = old_ruid; 482 483 /* Copied from kernel/sys.c:setfsuid. */ 484 485 /* 486 * FIXME - is fsuser used for all CAP_FS_MASK capabilities? 487 * if not, we might be a bit too harsh here. 488 */ 489 490 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 491 if (old_fsuid == 0 && current->fsuid != 0) { 492 current->cap_effective = 493 cap_drop_fs_set( 494 current->cap_effective); 495 } 496 if (old_fsuid != 0 && current->fsuid == 0) { 497 current->cap_effective = 498 cap_raise_fs_set( 499 current->cap_effective, 500 current->cap_permitted); 501 } 502 } 503 break; 504 } 505 default: 506 return -EINVAL; 507 } 508 509 return 0; 510} 511 512#ifdef CONFIG_SECURITY_FILE_CAPABILITIES 513/* 514 * Rationale: code calling task_setscheduler, task_setioprio, and 515 * task_setnice, assumes that 516 * . if capable(cap_sys_nice), then those actions should be allowed 517 * . if not capable(cap_sys_nice), but acting on your own processes, 518 * then those actions should be allowed 519 * This is insufficient now since you can call code without suid, but 520 * yet with increased caps. 521 * So we check for increased caps on the target process. 522 */ 523static inline int cap_safe_nice(struct task_struct *p) 524{ 525 if (!cap_issubset(p->cap_permitted, current->cap_permitted) && 526 !__capable(current, CAP_SYS_NICE)) 527 return -EPERM; 528 return 0; 529} 530 531int cap_task_setscheduler (struct task_struct *p, int policy, 532 struct sched_param *lp) 533{ 534 return cap_safe_nice(p); 535} 536 537int cap_task_setioprio (struct task_struct *p, int ioprio) 538{ 539 return cap_safe_nice(p); 540} 541 542int cap_task_setnice (struct task_struct *p, int nice) 543{ 544 return cap_safe_nice(p); 545} 546 547/* 548 * called from kernel/sys.c for prctl(PR_CABSET_DROP) 549 * done without task_capability_lock() because it introduces 550 * no new races - i.e. only another task doing capget() on 551 * this task could get inconsistent info. There can be no 552 * racing writer bc a task can only change its own caps. 553 */ 554static long cap_prctl_drop(unsigned long cap) 555{ 556 if (!capable(CAP_SETPCAP)) 557 return -EPERM; 558 if (!cap_valid(cap)) 559 return -EINVAL; 560 cap_lower(current->cap_bset, cap); 561 return 0; 562} 563 564#else 565int cap_task_setscheduler (struct task_struct *p, int policy, 566 struct sched_param *lp) 567{ 568 return 0; 569} 570int cap_task_setioprio (struct task_struct *p, int ioprio) 571{ 572 return 0; 573} 574int cap_task_setnice (struct task_struct *p, int nice) 575{ 576 return 0; 577} 578#endif 579 580int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, 581 unsigned long arg4, unsigned long arg5, long *rc_p) 582{ 583 long error = 0; 584 585 switch (option) { 586 case PR_CAPBSET_READ: 587 if (!cap_valid(arg2)) 588 error = -EINVAL; 589 else 590 error = !!cap_raised(current->cap_bset, arg2); 591 break; 592#ifdef CONFIG_SECURITY_FILE_CAPABILITIES 593 case PR_CAPBSET_DROP: 594 error = cap_prctl_drop(arg2); 595 break; 596 597 /* 598 * The next four prctl's remain to assist with transitioning a 599 * system from legacy UID=0 based privilege (when filesystem 600 * capabilities are not in use) to a system using filesystem 601 * capabilities only - as the POSIX.1e draft intended. 602 * 603 * Note: 604 * 605 * PR_SET_SECUREBITS = 606 * issecure_mask(SECURE_KEEP_CAPS_LOCKED) 607 * | issecure_mask(SECURE_NOROOT) 608 * | issecure_mask(SECURE_NOROOT_LOCKED) 609 * | issecure_mask(SECURE_NO_SETUID_FIXUP) 610 * | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED) 611 * 612 * will ensure that the current process and all of its 613 * children will be locked into a pure 614 * capability-based-privilege environment. 615 */ 616 case PR_SET_SECUREBITS: 617 if ((((current->securebits & SECURE_ALL_LOCKS) >> 1) 618 & (current->securebits ^ arg2)) /*[1]*/ 619 || ((current->securebits & SECURE_ALL_LOCKS 620 & ~arg2)) /*[2]*/ 621 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ 622 || (cap_capable(current, CAP_SETPCAP) != 0)) { /*[4]*/ 623 /* 624 * [1] no changing of bits that are locked 625 * [2] no unlocking of locks 626 * [3] no setting of unsupported bits 627 * [4] doing anything requires privilege (go read about 628 * the "sendmail capabilities bug") 629 */ 630 error = -EPERM; /* cannot change a locked bit */ 631 } else { 632 current->securebits = arg2; 633 } 634 break; 635 case PR_GET_SECUREBITS: 636 error = current->securebits; 637 break; 638 639#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 640 641 case PR_GET_KEEPCAPS: 642 if (issecure(SECURE_KEEP_CAPS)) 643 error = 1; 644 break; 645 case PR_SET_KEEPCAPS: 646 if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ 647 error = -EINVAL; 648 else if (issecure(SECURE_KEEP_CAPS_LOCKED)) 649 error = -EPERM; 650 else if (arg2) 651 current->securebits |= issecure_mask(SECURE_KEEP_CAPS); 652 else 653 current->securebits &= 654 ~issecure_mask(SECURE_KEEP_CAPS); 655 break; 656 657 default: 658 /* No functionality available - continue with default */ 659 return 0; 660 } 661 662 /* Functionality provided */ 663 *rc_p = error; 664 return 1; 665} 666 667void cap_task_reparent_to_init (struct task_struct *p) 668{ 669 cap_set_init_eff(p->cap_effective); 670 cap_clear(p->cap_inheritable); 671 cap_set_full(p->cap_permitted); 672 p->securebits = SECUREBITS_DEFAULT; 673 return; 674} 675 676int cap_syslog (int type) 677{ 678 if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) 679 return -EPERM; 680 return 0; 681} 682 683int cap_vm_enough_memory(struct mm_struct *mm, long pages) 684{ 685 int cap_sys_admin = 0; 686 687 if (cap_capable(current, CAP_SYS_ADMIN) == 0) 688 cap_sys_admin = 1; 689 return __vm_enough_memory(mm, pages, cap_sys_admin); 690} 691