1/* 2 * device_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7#include <linux/device_cgroup.h> 8#include <linux/cgroup.h> 9#include <linux/ctype.h> 10#include <linux/list.h> 11#include <linux/uaccess.h> 12#include <linux/seq_file.h> 13#include <linux/rcupdate.h> 14 15#define ACC_MKNOD 1 16#define ACC_READ 2 17#define ACC_WRITE 4 18#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 19 20#define DEV_BLOCK 1 21#define DEV_CHAR 2 22#define DEV_ALL 4 /* this represents all devices */ 23 24/* 25 * whitelist locking rules: 26 * hold cgroup_lock() for update/read. 27 * hold rcu_read_lock() for read. 28 */ 29 30struct dev_whitelist_item { 31 u32 major, minor; 32 short type; 33 short access; 34 struct list_head list; 35 struct rcu_head rcu; 36}; 37 38struct dev_cgroup { 39 struct cgroup_subsys_state css; 40 struct list_head whitelist; 41}; 42 43static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 44{ 45 return container_of(s, struct dev_cgroup, css); 46} 47 48static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 49{ 50 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 51} 52 53static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) 54{ 55 return css_to_devcgroup(task_subsys_state(task, devices_subsys_id)); 56} 57 58struct cgroup_subsys devices_subsys; 59 60static int devcgroup_can_attach(struct cgroup_subsys *ss, 61 struct cgroup *new_cgroup, struct task_struct *task) 62{ 63 if (current != task && !capable(CAP_SYS_ADMIN)) 64 return -EPERM; 65 66 return 0; 67} 68 69/* 70 * called under cgroup_lock() 71 */ 72static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 73{ 74 struct dev_whitelist_item *wh, *tmp, *new; 75 76 list_for_each_entry(wh, orig, list) { 77 new = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 78 if (!new) 79 goto free_and_exit; 80 list_add_tail(&new->list, dest); 81 } 82 83 return 0; 84 85free_and_exit: 86 list_for_each_entry_safe(wh, tmp, dest, list) { 87 list_del(&wh->list); 88 kfree(wh); 89 } 90 return -ENOMEM; 91} 92 93/* Stupid prototype - don't bother combining existing entries */ 94/* 95 * called under cgroup_lock() 96 */ 97static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 98 struct dev_whitelist_item *wh) 99{ 100 struct dev_whitelist_item *whcopy, *walk; 101 102 whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 103 if (!whcopy) 104 return -ENOMEM; 105 106 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 107 if (walk->type != wh->type) 108 continue; 109 if (walk->major != wh->major) 110 continue; 111 if (walk->minor != wh->minor) 112 continue; 113 114 walk->access |= wh->access; 115 kfree(whcopy); 116 whcopy = NULL; 117 } 118 119 if (whcopy != NULL) 120 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist); 121 return 0; 122} 123 124static void whitelist_item_free(struct rcu_head *rcu) 125{ 126 struct dev_whitelist_item *item; 127 128 item = container_of(rcu, struct dev_whitelist_item, rcu); 129 kfree(item); 130} 131 132/* 133 * called under cgroup_lock() 134 */ 135static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 136 struct dev_whitelist_item *wh) 137{ 138 struct dev_whitelist_item *walk, *tmp; 139 140 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 141 if (walk->type == DEV_ALL) 142 goto remove; 143 if (walk->type != wh->type) 144 continue; 145 if (walk->major != ~0 && walk->major != wh->major) 146 continue; 147 if (walk->minor != ~0 && walk->minor != wh->minor) 148 continue; 149 150remove: 151 walk->access &= ~wh->access; 152 if (!walk->access) { 153 list_del_rcu(&walk->list); 154 call_rcu(&walk->rcu, whitelist_item_free); 155 } 156 } 157} 158 159/* 160 * called from kernel/cgroup.c with cgroup_lock() held. 161 */ 162static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 163 struct cgroup *cgroup) 164{ 165 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 166 struct cgroup *parent_cgroup; 167 int ret; 168 169 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 170 if (!dev_cgroup) 171 return ERR_PTR(-ENOMEM); 172 INIT_LIST_HEAD(&dev_cgroup->whitelist); 173 parent_cgroup = cgroup->parent; 174 175 if (parent_cgroup == NULL) { 176 struct dev_whitelist_item *wh; 177 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 178 if (!wh) { 179 kfree(dev_cgroup); 180 return ERR_PTR(-ENOMEM); 181 } 182 wh->minor = wh->major = ~0; 183 wh->type = DEV_ALL; 184 wh->access = ACC_MASK; 185 list_add(&wh->list, &dev_cgroup->whitelist); 186 } else { 187 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 188 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 189 &parent_dev_cgroup->whitelist); 190 if (ret) { 191 kfree(dev_cgroup); 192 return ERR_PTR(ret); 193 } 194 } 195 196 return &dev_cgroup->css; 197} 198 199static void devcgroup_destroy(struct cgroup_subsys *ss, 200 struct cgroup *cgroup) 201{ 202 struct dev_cgroup *dev_cgroup; 203 struct dev_whitelist_item *wh, *tmp; 204 205 dev_cgroup = cgroup_to_devcgroup(cgroup); 206 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 207 list_del(&wh->list); 208 kfree(wh); 209 } 210 kfree(dev_cgroup); 211} 212 213#define DEVCG_ALLOW 1 214#define DEVCG_DENY 2 215#define DEVCG_LIST 3 216 217#define MAJMINLEN 13 218#define ACCLEN 4 219 220static void set_access(char *acc, short access) 221{ 222 int idx = 0; 223 memset(acc, 0, ACCLEN); 224 if (access & ACC_READ) 225 acc[idx++] = 'r'; 226 if (access & ACC_WRITE) 227 acc[idx++] = 'w'; 228 if (access & ACC_MKNOD) 229 acc[idx++] = 'm'; 230} 231 232static char type_to_char(short type) 233{ 234 if (type == DEV_ALL) 235 return 'a'; 236 if (type == DEV_CHAR) 237 return 'c'; 238 if (type == DEV_BLOCK) 239 return 'b'; 240 return 'X'; 241} 242 243static void set_majmin(char *str, unsigned m) 244{ 245 if (m == ~0) 246 strcpy(str, "*"); 247 else 248 sprintf(str, "%u", m); 249} 250 251static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 252 struct seq_file *m) 253{ 254 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 255 struct dev_whitelist_item *wh; 256 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 257 258 rcu_read_lock(); 259 list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) { 260 set_access(acc, wh->access); 261 set_majmin(maj, wh->major); 262 set_majmin(min, wh->minor); 263 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 264 maj, min, acc); 265 } 266 rcu_read_unlock(); 267 268 return 0; 269} 270 271/* 272 * may_access_whitelist: 273 * does the access granted to dev_cgroup c contain the access 274 * requested in whitelist item refwh. 275 * return 1 if yes, 0 if no. 276 * call with c->lock held 277 */ 278static int may_access_whitelist(struct dev_cgroup *c, 279 struct dev_whitelist_item *refwh) 280{ 281 struct dev_whitelist_item *whitem; 282 283 list_for_each_entry(whitem, &c->whitelist, list) { 284 if (whitem->type & DEV_ALL) 285 return 1; 286 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 287 continue; 288 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 289 continue; 290 if (whitem->major != ~0 && whitem->major != refwh->major) 291 continue; 292 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 293 continue; 294 if (refwh->access & (~whitem->access)) 295 continue; 296 return 1; 297 } 298 return 0; 299} 300 301/* 302 * parent_has_perm: 303 * when adding a new allow rule to a device whitelist, the rule 304 * must be allowed in the parent device 305 */ 306static int parent_has_perm(struct dev_cgroup *childcg, 307 struct dev_whitelist_item *wh) 308{ 309 struct cgroup *pcg = childcg->css.cgroup->parent; 310 struct dev_cgroup *parent; 311 312 if (!pcg) 313 return 1; 314 parent = cgroup_to_devcgroup(pcg); 315 return may_access_whitelist(parent, wh); 316} 317 318/* 319 * Modify the whitelist using allow/deny rules. 320 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 321 * so we can give a container CAP_MKNOD to let it create devices but not 322 * modify the whitelist. 323 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 324 * us to also grant CAP_SYS_ADMIN to containers without giving away the 325 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 326 * 327 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 328 * new access is only allowed if you're in the top-level cgroup, or your 329 * parent cgroup has the access you're asking for. 330 */ 331static int devcgroup_update_access(struct dev_cgroup *devcgroup, 332 int filetype, const char *buffer) 333{ 334 const char *b; 335 char *endp; 336 int count; 337 struct dev_whitelist_item wh; 338 339 if (!capable(CAP_SYS_ADMIN)) 340 return -EPERM; 341 342 memset(&wh, 0, sizeof(wh)); 343 b = buffer; 344 345 switch (*b) { 346 case 'a': 347 wh.type = DEV_ALL; 348 wh.access = ACC_MASK; 349 wh.major = ~0; 350 wh.minor = ~0; 351 goto handle; 352 case 'b': 353 wh.type = DEV_BLOCK; 354 break; 355 case 'c': 356 wh.type = DEV_CHAR; 357 break; 358 default: 359 return -EINVAL; 360 } 361 b++; 362 if (!isspace(*b)) 363 return -EINVAL; 364 b++; 365 if (*b == '*') { 366 wh.major = ~0; 367 b++; 368 } else if (isdigit(*b)) { 369 wh.major = simple_strtoul(b, &endp, 10); 370 b = endp; 371 } else { 372 return -EINVAL; 373 } 374 if (*b != ':') 375 return -EINVAL; 376 b++; 377 378 /* read minor */ 379 if (*b == '*') { 380 wh.minor = ~0; 381 b++; 382 } else if (isdigit(*b)) { 383 wh.minor = simple_strtoul(b, &endp, 10); 384 b = endp; 385 } else { 386 return -EINVAL; 387 } 388 if (!isspace(*b)) 389 return -EINVAL; 390 for (b++, count = 0; count < 3; count++, b++) { 391 switch (*b) { 392 case 'r': 393 wh.access |= ACC_READ; 394 break; 395 case 'w': 396 wh.access |= ACC_WRITE; 397 break; 398 case 'm': 399 wh.access |= ACC_MKNOD; 400 break; 401 case '\n': 402 case '\0': 403 count = 3; 404 break; 405 default: 406 return -EINVAL; 407 } 408 } 409 410handle: 411 switch (filetype) { 412 case DEVCG_ALLOW: 413 if (!parent_has_perm(devcgroup, &wh)) 414 return -EPERM; 415 return dev_whitelist_add(devcgroup, &wh); 416 case DEVCG_DENY: 417 dev_whitelist_rm(devcgroup, &wh); 418 break; 419 default: 420 return -EINVAL; 421 } 422 return 0; 423} 424 425static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, 426 const char *buffer) 427{ 428 int retval; 429 if (!cgroup_lock_live_group(cgrp)) 430 return -ENODEV; 431 retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp), 432 cft->private, buffer); 433 cgroup_unlock(); 434 return retval; 435} 436 437static struct cftype dev_cgroup_files[] = { 438 { 439 .name = "allow", 440 .write_string = devcgroup_access_write, 441 .private = DEVCG_ALLOW, 442 }, 443 { 444 .name = "deny", 445 .write_string = devcgroup_access_write, 446 .private = DEVCG_DENY, 447 }, 448 { 449 .name = "list", 450 .read_seq_string = devcgroup_seq_read, 451 .private = DEVCG_LIST, 452 }, 453}; 454 455static int devcgroup_populate(struct cgroup_subsys *ss, 456 struct cgroup *cgroup) 457{ 458 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 459 ARRAY_SIZE(dev_cgroup_files)); 460} 461 462struct cgroup_subsys devices_subsys = { 463 .name = "devices", 464 .can_attach = devcgroup_can_attach, 465 .create = devcgroup_create, 466 .destroy = devcgroup_destroy, 467 .populate = devcgroup_populate, 468 .subsys_id = devices_subsys_id, 469}; 470 471int devcgroup_inode_permission(struct inode *inode, int mask) 472{ 473 struct dev_cgroup *dev_cgroup; 474 struct dev_whitelist_item *wh; 475 476 dev_t device = inode->i_rdev; 477 if (!device) 478 return 0; 479 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) 480 return 0; 481 482 rcu_read_lock(); 483 484 dev_cgroup = task_devcgroup(current); 485 486 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 487 if (wh->type & DEV_ALL) 488 goto acc_check; 489 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 490 continue; 491 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 492 continue; 493 if (wh->major != ~0 && wh->major != imajor(inode)) 494 continue; 495 if (wh->minor != ~0 && wh->minor != iminor(inode)) 496 continue; 497acc_check: 498 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 499 continue; 500 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 501 continue; 502 rcu_read_unlock(); 503 return 0; 504 } 505 506 rcu_read_unlock(); 507 508 return -EPERM; 509} 510 511int devcgroup_inode_mknod(int mode, dev_t dev) 512{ 513 struct dev_cgroup *dev_cgroup; 514 struct dev_whitelist_item *wh; 515 516 rcu_read_lock(); 517 518 dev_cgroup = task_devcgroup(current); 519 520 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 521 if (wh->type & DEV_ALL) 522 goto acc_check; 523 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 524 continue; 525 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 526 continue; 527 if (wh->major != ~0 && wh->major != MAJOR(dev)) 528 continue; 529 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 530 continue; 531acc_check: 532 if (!(wh->access & ACC_MKNOD)) 533 continue; 534 rcu_read_unlock(); 535 return 0; 536 } 537 538 rcu_read_unlock(); 539 540 return -EPERM; 541}