Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at d91dfbb41bb2e9bdbfbd2cc7078ed7436eab027a 549 lines 12 kB view raw
1/* 2 * device_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7#include <linux/device_cgroup.h> 8#include <linux/cgroup.h> 9#include <linux/ctype.h> 10#include <linux/list.h> 11#include <linux/uaccess.h> 12#include <linux/seq_file.h> 13#include <linux/rcupdate.h> 14#include <linux/mutex.h> 15 16#define ACC_MKNOD 1 17#define ACC_READ 2 18#define ACC_WRITE 4 19#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 20 21#define DEV_BLOCK 1 22#define DEV_CHAR 2 23#define DEV_ALL 4 /* this represents all devices */ 24 25static DEFINE_MUTEX(devcgroup_mutex); 26 27/* 28 * whitelist locking rules: 29 * hold devcgroup_mutex for update/read. 30 * hold rcu_read_lock() for read. 31 */ 32 33struct dev_whitelist_item { 34 u32 major, minor; 35 short type; 36 short access; 37 struct list_head list; 38 struct rcu_head rcu; 39}; 40 41struct dev_cgroup { 42 struct cgroup_subsys_state css; 43 struct list_head whitelist; 44}; 45 46static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 47{ 48 return container_of(s, struct dev_cgroup, css); 49} 50 51static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 52{ 53 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 54} 55 56static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) 57{ 58 return css_to_devcgroup(task_subsys_state(task, devices_subsys_id)); 59} 60 61struct cgroup_subsys devices_subsys; 62 63static int devcgroup_can_attach(struct cgroup_subsys *ss, 64 struct cgroup *new_cgroup, struct task_struct *task) 65{ 66 if (current != task && !capable(CAP_SYS_ADMIN)) 67 return -EPERM; 68 69 return 0; 70} 71 72/* 73 * called under devcgroup_mutex 74 */ 75static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 76{ 77 struct dev_whitelist_item *wh, *tmp, *new; 78 79 list_for_each_entry(wh, orig, list) { 80 new = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 81 if (!new) 82 goto free_and_exit; 83 list_add_tail(&new->list, dest); 84 } 85 86 return 0; 87 88free_and_exit: 89 list_for_each_entry_safe(wh, tmp, dest, list) { 90 list_del(&wh->list); 91 kfree(wh); 92 } 93 return -ENOMEM; 94} 95 96/* Stupid prototype - don't bother combining existing entries */ 97/* 98 * called under devcgroup_mutex 99 */ 100static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 101 struct dev_whitelist_item *wh) 102{ 103 struct dev_whitelist_item *whcopy, *walk; 104 105 whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 106 if (!whcopy) 107 return -ENOMEM; 108 109 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 110 if (walk->type != wh->type) 111 continue; 112 if (walk->major != wh->major) 113 continue; 114 if (walk->minor != wh->minor) 115 continue; 116 117 walk->access |= wh->access; 118 kfree(whcopy); 119 whcopy = NULL; 120 } 121 122 if (whcopy != NULL) 123 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist); 124 return 0; 125} 126 127static void whitelist_item_free(struct rcu_head *rcu) 128{ 129 struct dev_whitelist_item *item; 130 131 item = container_of(rcu, struct dev_whitelist_item, rcu); 132 kfree(item); 133} 134 135/* 136 * called under devcgroup_mutex 137 */ 138static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 139 struct dev_whitelist_item *wh) 140{ 141 struct dev_whitelist_item *walk, *tmp; 142 143 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 144 if (walk->type == DEV_ALL) 145 goto remove; 146 if (walk->type != wh->type) 147 continue; 148 if (walk->major != ~0 && walk->major != wh->major) 149 continue; 150 if (walk->minor != ~0 && walk->minor != wh->minor) 151 continue; 152 153remove: 154 walk->access &= ~wh->access; 155 if (!walk->access) { 156 list_del_rcu(&walk->list); 157 call_rcu(&walk->rcu, whitelist_item_free); 158 } 159 } 160} 161 162/* 163 * called from kernel/cgroup.c with cgroup_lock() held. 164 */ 165static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 166 struct cgroup *cgroup) 167{ 168 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 169 struct cgroup *parent_cgroup; 170 int ret; 171 172 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 173 if (!dev_cgroup) 174 return ERR_PTR(-ENOMEM); 175 INIT_LIST_HEAD(&dev_cgroup->whitelist); 176 parent_cgroup = cgroup->parent; 177 178 if (parent_cgroup == NULL) { 179 struct dev_whitelist_item *wh; 180 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 181 if (!wh) { 182 kfree(dev_cgroup); 183 return ERR_PTR(-ENOMEM); 184 } 185 wh->minor = wh->major = ~0; 186 wh->type = DEV_ALL; 187 wh->access = ACC_MASK; 188 list_add(&wh->list, &dev_cgroup->whitelist); 189 } else { 190 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 191 mutex_lock(&devcgroup_mutex); 192 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 193 &parent_dev_cgroup->whitelist); 194 mutex_unlock(&devcgroup_mutex); 195 if (ret) { 196 kfree(dev_cgroup); 197 return ERR_PTR(ret); 198 } 199 } 200 201 return &dev_cgroup->css; 202} 203 204static void devcgroup_destroy(struct cgroup_subsys *ss, 205 struct cgroup *cgroup) 206{ 207 struct dev_cgroup *dev_cgroup; 208 struct dev_whitelist_item *wh, *tmp; 209 210 dev_cgroup = cgroup_to_devcgroup(cgroup); 211 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 212 list_del(&wh->list); 213 kfree(wh); 214 } 215 kfree(dev_cgroup); 216} 217 218#define DEVCG_ALLOW 1 219#define DEVCG_DENY 2 220#define DEVCG_LIST 3 221 222#define MAJMINLEN 13 223#define ACCLEN 4 224 225static void set_access(char *acc, short access) 226{ 227 int idx = 0; 228 memset(acc, 0, ACCLEN); 229 if (access & ACC_READ) 230 acc[idx++] = 'r'; 231 if (access & ACC_WRITE) 232 acc[idx++] = 'w'; 233 if (access & ACC_MKNOD) 234 acc[idx++] = 'm'; 235} 236 237static char type_to_char(short type) 238{ 239 if (type == DEV_ALL) 240 return 'a'; 241 if (type == DEV_CHAR) 242 return 'c'; 243 if (type == DEV_BLOCK) 244 return 'b'; 245 return 'X'; 246} 247 248static void set_majmin(char *str, unsigned m) 249{ 250 if (m == ~0) 251 strcpy(str, "*"); 252 else 253 sprintf(str, "%u", m); 254} 255 256static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 257 struct seq_file *m) 258{ 259 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 260 struct dev_whitelist_item *wh; 261 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 262 263 rcu_read_lock(); 264 list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) { 265 set_access(acc, wh->access); 266 set_majmin(maj, wh->major); 267 set_majmin(min, wh->minor); 268 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 269 maj, min, acc); 270 } 271 rcu_read_unlock(); 272 273 return 0; 274} 275 276/* 277 * may_access_whitelist: 278 * does the access granted to dev_cgroup c contain the access 279 * requested in whitelist item refwh. 280 * return 1 if yes, 0 if no. 281 * call with devcgroup_mutex held 282 */ 283static int may_access_whitelist(struct dev_cgroup *c, 284 struct dev_whitelist_item *refwh) 285{ 286 struct dev_whitelist_item *whitem; 287 288 list_for_each_entry(whitem, &c->whitelist, list) { 289 if (whitem->type & DEV_ALL) 290 return 1; 291 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 292 continue; 293 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 294 continue; 295 if (whitem->major != ~0 && whitem->major != refwh->major) 296 continue; 297 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 298 continue; 299 if (refwh->access & (~whitem->access)) 300 continue; 301 return 1; 302 } 303 return 0; 304} 305 306/* 307 * parent_has_perm: 308 * when adding a new allow rule to a device whitelist, the rule 309 * must be allowed in the parent device 310 */ 311static int parent_has_perm(struct dev_cgroup *childcg, 312 struct dev_whitelist_item *wh) 313{ 314 struct cgroup *pcg = childcg->css.cgroup->parent; 315 struct dev_cgroup *parent; 316 317 if (!pcg) 318 return 1; 319 parent = cgroup_to_devcgroup(pcg); 320 return may_access_whitelist(parent, wh); 321} 322 323/* 324 * Modify the whitelist using allow/deny rules. 325 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 326 * so we can give a container CAP_MKNOD to let it create devices but not 327 * modify the whitelist. 328 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 329 * us to also grant CAP_SYS_ADMIN to containers without giving away the 330 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 331 * 332 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 333 * new access is only allowed if you're in the top-level cgroup, or your 334 * parent cgroup has the access you're asking for. 335 */ 336static int devcgroup_update_access(struct dev_cgroup *devcgroup, 337 int filetype, const char *buffer) 338{ 339 const char *b; 340 char *endp; 341 int count; 342 struct dev_whitelist_item wh; 343 344 if (!capable(CAP_SYS_ADMIN)) 345 return -EPERM; 346 347 memset(&wh, 0, sizeof(wh)); 348 b = buffer; 349 350 switch (*b) { 351 case 'a': 352 wh.type = DEV_ALL; 353 wh.access = ACC_MASK; 354 wh.major = ~0; 355 wh.minor = ~0; 356 goto handle; 357 case 'b': 358 wh.type = DEV_BLOCK; 359 break; 360 case 'c': 361 wh.type = DEV_CHAR; 362 break; 363 default: 364 return -EINVAL; 365 } 366 b++; 367 if (!isspace(*b)) 368 return -EINVAL; 369 b++; 370 if (*b == '*') { 371 wh.major = ~0; 372 b++; 373 } else if (isdigit(*b)) { 374 wh.major = simple_strtoul(b, &endp, 10); 375 b = endp; 376 } else { 377 return -EINVAL; 378 } 379 if (*b != ':') 380 return -EINVAL; 381 b++; 382 383 /* read minor */ 384 if (*b == '*') { 385 wh.minor = ~0; 386 b++; 387 } else if (isdigit(*b)) { 388 wh.minor = simple_strtoul(b, &endp, 10); 389 b = endp; 390 } else { 391 return -EINVAL; 392 } 393 if (!isspace(*b)) 394 return -EINVAL; 395 for (b++, count = 0; count < 3; count++, b++) { 396 switch (*b) { 397 case 'r': 398 wh.access |= ACC_READ; 399 break; 400 case 'w': 401 wh.access |= ACC_WRITE; 402 break; 403 case 'm': 404 wh.access |= ACC_MKNOD; 405 break; 406 case '\n': 407 case '\0': 408 count = 3; 409 break; 410 default: 411 return -EINVAL; 412 } 413 } 414 415handle: 416 switch (filetype) { 417 case DEVCG_ALLOW: 418 if (!parent_has_perm(devcgroup, &wh)) 419 return -EPERM; 420 return dev_whitelist_add(devcgroup, &wh); 421 case DEVCG_DENY: 422 dev_whitelist_rm(devcgroup, &wh); 423 break; 424 default: 425 return -EINVAL; 426 } 427 return 0; 428} 429 430static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, 431 const char *buffer) 432{ 433 int retval; 434 435 mutex_lock(&devcgroup_mutex); 436 retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp), 437 cft->private, buffer); 438 mutex_unlock(&devcgroup_mutex); 439 return retval; 440} 441 442static struct cftype dev_cgroup_files[] = { 443 { 444 .name = "allow", 445 .write_string = devcgroup_access_write, 446 .private = DEVCG_ALLOW, 447 }, 448 { 449 .name = "deny", 450 .write_string = devcgroup_access_write, 451 .private = DEVCG_DENY, 452 }, 453 { 454 .name = "list", 455 .read_seq_string = devcgroup_seq_read, 456 .private = DEVCG_LIST, 457 }, 458}; 459 460static int devcgroup_populate(struct cgroup_subsys *ss, 461 struct cgroup *cgroup) 462{ 463 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 464 ARRAY_SIZE(dev_cgroup_files)); 465} 466 467struct cgroup_subsys devices_subsys = { 468 .name = "devices", 469 .can_attach = devcgroup_can_attach, 470 .create = devcgroup_create, 471 .destroy = devcgroup_destroy, 472 .populate = devcgroup_populate, 473 .subsys_id = devices_subsys_id, 474}; 475 476int devcgroup_inode_permission(struct inode *inode, int mask) 477{ 478 struct dev_cgroup *dev_cgroup; 479 struct dev_whitelist_item *wh; 480 481 dev_t device = inode->i_rdev; 482 if (!device) 483 return 0; 484 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) 485 return 0; 486 487 rcu_read_lock(); 488 489 dev_cgroup = task_devcgroup(current); 490 491 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 492 if (wh->type & DEV_ALL) 493 goto acc_check; 494 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 495 continue; 496 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 497 continue; 498 if (wh->major != ~0 && wh->major != imajor(inode)) 499 continue; 500 if (wh->minor != ~0 && wh->minor != iminor(inode)) 501 continue; 502acc_check: 503 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 504 continue; 505 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 506 continue; 507 rcu_read_unlock(); 508 return 0; 509 } 510 511 rcu_read_unlock(); 512 513 return -EPERM; 514} 515 516int devcgroup_inode_mknod(int mode, dev_t dev) 517{ 518 struct dev_cgroup *dev_cgroup; 519 struct dev_whitelist_item *wh; 520 521 if (!S_ISBLK(mode) && !S_ISCHR(mode)) 522 return 0; 523 524 rcu_read_lock(); 525 526 dev_cgroup = task_devcgroup(current); 527 528 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 529 if (wh->type & DEV_ALL) 530 goto acc_check; 531 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 532 continue; 533 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 534 continue; 535 if (wh->major != ~0 && wh->major != MAJOR(dev)) 536 continue; 537 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 538 continue; 539acc_check: 540 if (!(wh->access & ACC_MKNOD)) 541 continue; 542 rcu_read_unlock(); 543 return 0; 544 } 545 546 rcu_read_unlock(); 547 548 return -EPERM; 549}