Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 246de42cfc0abc4e25585f2dca53f8226f62391c 571 lines 13 kB view raw
1/* 2 * dev_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7#include <linux/device_cgroup.h> 8#include <linux/cgroup.h> 9#include <linux/ctype.h> 10#include <linux/list.h> 11#include <linux/uaccess.h> 12#include <linux/seq_file.h> 13 14#define ACC_MKNOD 1 15#define ACC_READ 2 16#define ACC_WRITE 4 17#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 18 19#define DEV_BLOCK 1 20#define DEV_CHAR 2 21#define DEV_ALL 4 /* this represents all devices */ 22 23/* 24 * whitelist locking rules: 25 * cgroup_lock() cannot be taken under dev_cgroup->lock. 26 * dev_cgroup->lock can be taken with or without cgroup_lock(). 27 * 28 * modifications always require cgroup_lock 29 * modifications to a list which is visible require the 30 * dev_cgroup->lock *and* cgroup_lock() 31 * walking the list requires dev_cgroup->lock or cgroup_lock(). 32 * 33 * reasoning: dev_whitelist_copy() needs to kmalloc, so needs 34 * a mutex, which the cgroup_lock() is. Since modifying 35 * a visible list requires both locks, either lock can be 36 * taken for walking the list. 37 */ 38 39struct dev_whitelist_item { 40 u32 major, minor; 41 short type; 42 short access; 43 struct list_head list; 44 struct rcu_head rcu; 45}; 46 47struct dev_cgroup { 48 struct cgroup_subsys_state css; 49 struct list_head whitelist; 50 spinlock_t lock; 51}; 52 53static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 54{ 55 return container_of(s, struct dev_cgroup, css); 56} 57 58static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 59{ 60 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 61} 62 63static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) 64{ 65 return css_to_devcgroup(task_subsys_state(task, devices_subsys_id)); 66} 67 68struct cgroup_subsys devices_subsys; 69 70static int devcgroup_can_attach(struct cgroup_subsys *ss, 71 struct cgroup *new_cgroup, struct task_struct *task) 72{ 73 if (current != task && !capable(CAP_SYS_ADMIN)) 74 return -EPERM; 75 76 return 0; 77} 78 79/* 80 * called under cgroup_lock() 81 */ 82static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 83{ 84 struct dev_whitelist_item *wh, *tmp, *new; 85 86 list_for_each_entry(wh, orig, list) { 87 new = kmalloc(sizeof(*wh), GFP_KERNEL); 88 if (!new) 89 goto free_and_exit; 90 new->major = wh->major; 91 new->minor = wh->minor; 92 new->type = wh->type; 93 new->access = wh->access; 94 list_add_tail(&new->list, dest); 95 } 96 97 return 0; 98 99free_and_exit: 100 list_for_each_entry_safe(wh, tmp, dest, list) { 101 list_del(&wh->list); 102 kfree(wh); 103 } 104 return -ENOMEM; 105} 106 107/* Stupid prototype - don't bother combining existing entries */ 108/* 109 * called under cgroup_lock() 110 * since the list is visible to other tasks, we need the spinlock also 111 */ 112static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 113 struct dev_whitelist_item *wh) 114{ 115 struct dev_whitelist_item *whcopy, *walk; 116 117 whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL); 118 if (!whcopy) 119 return -ENOMEM; 120 121 memcpy(whcopy, wh, sizeof(*whcopy)); 122 spin_lock(&dev_cgroup->lock); 123 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 124 if (walk->type != wh->type) 125 continue; 126 if (walk->major != wh->major) 127 continue; 128 if (walk->minor != wh->minor) 129 continue; 130 131 walk->access |= wh->access; 132 kfree(whcopy); 133 whcopy = NULL; 134 } 135 136 if (whcopy != NULL) 137 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist); 138 spin_unlock(&dev_cgroup->lock); 139 return 0; 140} 141 142static void whitelist_item_free(struct rcu_head *rcu) 143{ 144 struct dev_whitelist_item *item; 145 146 item = container_of(rcu, struct dev_whitelist_item, rcu); 147 kfree(item); 148} 149 150/* 151 * called under cgroup_lock() 152 * since the list is visible to other tasks, we need the spinlock also 153 */ 154static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 155 struct dev_whitelist_item *wh) 156{ 157 struct dev_whitelist_item *walk, *tmp; 158 159 spin_lock(&dev_cgroup->lock); 160 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 161 if (walk->type == DEV_ALL) 162 goto remove; 163 if (walk->type != wh->type) 164 continue; 165 if (walk->major != ~0 && walk->major != wh->major) 166 continue; 167 if (walk->minor != ~0 && walk->minor != wh->minor) 168 continue; 169 170remove: 171 walk->access &= ~wh->access; 172 if (!walk->access) { 173 list_del_rcu(&walk->list); 174 call_rcu(&walk->rcu, whitelist_item_free); 175 } 176 } 177 spin_unlock(&dev_cgroup->lock); 178} 179 180/* 181 * called from kernel/cgroup.c with cgroup_lock() held. 182 */ 183static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 184 struct cgroup *cgroup) 185{ 186 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 187 struct cgroup *parent_cgroup; 188 int ret; 189 190 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 191 if (!dev_cgroup) 192 return ERR_PTR(-ENOMEM); 193 INIT_LIST_HEAD(&dev_cgroup->whitelist); 194 parent_cgroup = cgroup->parent; 195 196 if (parent_cgroup == NULL) { 197 struct dev_whitelist_item *wh; 198 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 199 if (!wh) { 200 kfree(dev_cgroup); 201 return ERR_PTR(-ENOMEM); 202 } 203 wh->minor = wh->major = ~0; 204 wh->type = DEV_ALL; 205 wh->access = ACC_MASK; 206 list_add(&wh->list, &dev_cgroup->whitelist); 207 } else { 208 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 209 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 210 &parent_dev_cgroup->whitelist); 211 if (ret) { 212 kfree(dev_cgroup); 213 return ERR_PTR(ret); 214 } 215 } 216 217 spin_lock_init(&dev_cgroup->lock); 218 return &dev_cgroup->css; 219} 220 221static void devcgroup_destroy(struct cgroup_subsys *ss, 222 struct cgroup *cgroup) 223{ 224 struct dev_cgroup *dev_cgroup; 225 struct dev_whitelist_item *wh, *tmp; 226 227 dev_cgroup = cgroup_to_devcgroup(cgroup); 228 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 229 list_del(&wh->list); 230 kfree(wh); 231 } 232 kfree(dev_cgroup); 233} 234 235#define DEVCG_ALLOW 1 236#define DEVCG_DENY 2 237#define DEVCG_LIST 3 238 239#define MAJMINLEN 13 240#define ACCLEN 4 241 242static void set_access(char *acc, short access) 243{ 244 int idx = 0; 245 memset(acc, 0, ACCLEN); 246 if (access & ACC_READ) 247 acc[idx++] = 'r'; 248 if (access & ACC_WRITE) 249 acc[idx++] = 'w'; 250 if (access & ACC_MKNOD) 251 acc[idx++] = 'm'; 252} 253 254static char type_to_char(short type) 255{ 256 if (type == DEV_ALL) 257 return 'a'; 258 if (type == DEV_CHAR) 259 return 'c'; 260 if (type == DEV_BLOCK) 261 return 'b'; 262 return 'X'; 263} 264 265static void set_majmin(char *str, unsigned m) 266{ 267 if (m == ~0) 268 strcpy(str, "*"); 269 else 270 sprintf(str, "%u", m); 271} 272 273static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 274 struct seq_file *m) 275{ 276 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 277 struct dev_whitelist_item *wh; 278 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 279 280 rcu_read_lock(); 281 list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) { 282 set_access(acc, wh->access); 283 set_majmin(maj, wh->major); 284 set_majmin(min, wh->minor); 285 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 286 maj, min, acc); 287 } 288 rcu_read_unlock(); 289 290 return 0; 291} 292 293/* 294 * may_access_whitelist: 295 * does the access granted to dev_cgroup c contain the access 296 * requested in whitelist item refwh. 297 * return 1 if yes, 0 if no. 298 * call with c->lock held 299 */ 300static int may_access_whitelist(struct dev_cgroup *c, 301 struct dev_whitelist_item *refwh) 302{ 303 struct dev_whitelist_item *whitem; 304 305 list_for_each_entry(whitem, &c->whitelist, list) { 306 if (whitem->type & DEV_ALL) 307 return 1; 308 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 309 continue; 310 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 311 continue; 312 if (whitem->major != ~0 && whitem->major != refwh->major) 313 continue; 314 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 315 continue; 316 if (refwh->access & (~whitem->access)) 317 continue; 318 return 1; 319 } 320 return 0; 321} 322 323/* 324 * parent_has_perm: 325 * when adding a new allow rule to a device whitelist, the rule 326 * must be allowed in the parent device 327 */ 328static int parent_has_perm(struct dev_cgroup *childcg, 329 struct dev_whitelist_item *wh) 330{ 331 struct cgroup *pcg = childcg->css.cgroup->parent; 332 struct dev_cgroup *parent; 333 int ret; 334 335 if (!pcg) 336 return 1; 337 parent = cgroup_to_devcgroup(pcg); 338 spin_lock(&parent->lock); 339 ret = may_access_whitelist(parent, wh); 340 spin_unlock(&parent->lock); 341 return ret; 342} 343 344/* 345 * Modify the whitelist using allow/deny rules. 346 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 347 * so we can give a container CAP_MKNOD to let it create devices but not 348 * modify the whitelist. 349 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 350 * us to also grant CAP_SYS_ADMIN to containers without giving away the 351 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 352 * 353 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 354 * new access is only allowed if you're in the top-level cgroup, or your 355 * parent cgroup has the access you're asking for. 356 */ 357static int devcgroup_update_access(struct dev_cgroup *devcgroup, 358 int filetype, const char *buffer) 359{ 360 struct dev_cgroup *cur_devcgroup; 361 const char *b; 362 char *endp; 363 int retval = 0, count; 364 struct dev_whitelist_item wh; 365 366 if (!capable(CAP_SYS_ADMIN)) 367 return -EPERM; 368 369 cur_devcgroup = task_devcgroup(current); 370 371 memset(&wh, 0, sizeof(wh)); 372 b = buffer; 373 374 switch (*b) { 375 case 'a': 376 wh.type = DEV_ALL; 377 wh.access = ACC_MASK; 378 wh.major = ~0; 379 wh.minor = ~0; 380 goto handle; 381 case 'b': 382 wh.type = DEV_BLOCK; 383 break; 384 case 'c': 385 wh.type = DEV_CHAR; 386 break; 387 default: 388 return -EINVAL; 389 } 390 b++; 391 if (!isspace(*b)) 392 return -EINVAL; 393 b++; 394 if (*b == '*') { 395 wh.major = ~0; 396 b++; 397 } else if (isdigit(*b)) { 398 wh.major = simple_strtoul(b, &endp, 10); 399 b = endp; 400 } else { 401 return -EINVAL; 402 } 403 if (*b != ':') 404 return -EINVAL; 405 b++; 406 407 /* read minor */ 408 if (*b == '*') { 409 wh.minor = ~0; 410 b++; 411 } else if (isdigit(*b)) { 412 wh.minor = simple_strtoul(b, &endp, 10); 413 b = endp; 414 } else { 415 return -EINVAL; 416 } 417 if (!isspace(*b)) 418 return -EINVAL; 419 for (b++, count = 0; count < 3; count++, b++) { 420 switch (*b) { 421 case 'r': 422 wh.access |= ACC_READ; 423 break; 424 case 'w': 425 wh.access |= ACC_WRITE; 426 break; 427 case 'm': 428 wh.access |= ACC_MKNOD; 429 break; 430 case '\n': 431 case '\0': 432 count = 3; 433 break; 434 default: 435 return -EINVAL; 436 } 437 } 438 439handle: 440 retval = 0; 441 switch (filetype) { 442 case DEVCG_ALLOW: 443 if (!parent_has_perm(devcgroup, &wh)) 444 return -EPERM; 445 return dev_whitelist_add(devcgroup, &wh); 446 case DEVCG_DENY: 447 dev_whitelist_rm(devcgroup, &wh); 448 break; 449 default: 450 return -EINVAL; 451 } 452 return 0; 453} 454 455static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, 456 const char *buffer) 457{ 458 int retval; 459 if (!cgroup_lock_live_group(cgrp)) 460 return -ENODEV; 461 retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp), 462 cft->private, buffer); 463 cgroup_unlock(); 464 return retval; 465} 466 467static struct cftype dev_cgroup_files[] = { 468 { 469 .name = "allow", 470 .write_string = devcgroup_access_write, 471 .private = DEVCG_ALLOW, 472 }, 473 { 474 .name = "deny", 475 .write_string = devcgroup_access_write, 476 .private = DEVCG_DENY, 477 }, 478 { 479 .name = "list", 480 .read_seq_string = devcgroup_seq_read, 481 .private = DEVCG_LIST, 482 }, 483}; 484 485static int devcgroup_populate(struct cgroup_subsys *ss, 486 struct cgroup *cgroup) 487{ 488 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 489 ARRAY_SIZE(dev_cgroup_files)); 490} 491 492struct cgroup_subsys devices_subsys = { 493 .name = "devices", 494 .can_attach = devcgroup_can_attach, 495 .create = devcgroup_create, 496 .destroy = devcgroup_destroy, 497 .populate = devcgroup_populate, 498 .subsys_id = devices_subsys_id, 499}; 500 501int devcgroup_inode_permission(struct inode *inode, int mask) 502{ 503 struct dev_cgroup *dev_cgroup; 504 struct dev_whitelist_item *wh; 505 506 dev_t device = inode->i_rdev; 507 if (!device) 508 return 0; 509 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) 510 return 0; 511 dev_cgroup = css_to_devcgroup(task_subsys_state(current, 512 devices_subsys_id)); 513 if (!dev_cgroup) 514 return 0; 515 516 rcu_read_lock(); 517 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 518 if (wh->type & DEV_ALL) 519 goto acc_check; 520 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 521 continue; 522 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 523 continue; 524 if (wh->major != ~0 && wh->major != imajor(inode)) 525 continue; 526 if (wh->minor != ~0 && wh->minor != iminor(inode)) 527 continue; 528acc_check: 529 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 530 continue; 531 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 532 continue; 533 rcu_read_unlock(); 534 return 0; 535 } 536 rcu_read_unlock(); 537 538 return -EPERM; 539} 540 541int devcgroup_inode_mknod(int mode, dev_t dev) 542{ 543 struct dev_cgroup *dev_cgroup; 544 struct dev_whitelist_item *wh; 545 546 dev_cgroup = css_to_devcgroup(task_subsys_state(current, 547 devices_subsys_id)); 548 if (!dev_cgroup) 549 return 0; 550 551 rcu_read_lock(); 552 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 553 if (wh->type & DEV_ALL) 554 goto acc_check; 555 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 556 continue; 557 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 558 continue; 559 if (wh->major != ~0 && wh->major != MAJOR(dev)) 560 continue; 561 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 562 continue; 563acc_check: 564 if (!(wh->access & ACC_MKNOD)) 565 continue; 566 rcu_read_unlock(); 567 return 0; 568 } 569 rcu_read_unlock(); 570 return -EPERM; 571}