Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/sysctl

Pull sysctl updates from Eric Biederman:

- Rewrite of sysctl for speed and clarity.

Insert/remove/Lookup in sysctl are all now O(NlogN) operations, and
are no longer bottlenecks in the process of adding and removing
network devices.

sysctl is now focused on being a filesystem instead of system call
and the code can all be found in fs/proc/proc_sysctl.c. Hopefully
this means the code is now approachable.

Much thanks is owed to Lucian Grinjincu for keeping at this until
something was found that was usable.

- The recent proc_sys_poll oops found by the fuzzer during hibernation
is fixed.

* git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/sysctl: (36 commits)
sysctl: protect poll() in entries that may go away
sysctl: Don't call sysctl_follow_link unless we are a link.
sysctl: Comments to make the code clearer.
sysctl: Correct error return from get_subdir
sysctl: An easier to read version of find_subdir
sysctl: fix memset parameters in setup_sysctl_set()
sysctl: remove an unused variable
sysctl: Add register_sysctl for normal sysctl users
sysctl: Index sysctl directories with rbtrees.
sysctl: Make the header lists per directory.
sysctl: Move sysctl_check_dups into insert_header
sysctl: Modify __register_sysctl_paths to take a set instead of a root and an nsproxy
sysctl: Replace root_list with links between sysctl_table_sets.
sysctl: Add sysctl_print_dir and use it in get_subdir
sysctl: Stop requiring explicit management of sysctl directories
sysctl: Add a root pointer to ctl_table_set
sysctl: Rewrite proc_sys_readdir in terms of first_entry and next_entry
sysctl: Rewrite proc_sys_lookup introducing find_entry and lookup_entry.
sysctl: Normalize the root_table data structure.
sysctl: Factor out insert_header and erase_header
...

+1280 -797
+3
fs/proc/internal.h
··· 10 10 */ 11 11 12 12 #include <linux/proc_fs.h> 13 + struct ctl_table_header; 13 14 14 15 extern struct proc_dir_entry proc_root; 15 16 #ifdef CONFIG_PROC_SYSCTL 16 17 extern int proc_sys_init(void); 18 + extern void sysctl_head_put(struct ctl_table_header *head); 17 19 #else 18 20 static inline void proc_sys_init(void) { } 21 + static inline void sysctl_head_put(struct ctl_table_header *head) { } 19 22 #endif 20 23 #ifdef CONFIG_NET 21 24 extern int proc_net_init(void);
+1198 -76
fs/proc/proc_sysctl.c
··· 9 9 #include <linux/sched.h> 10 10 #include <linux/namei.h> 11 11 #include <linux/mm.h> 12 + #include <linux/module.h> 12 13 #include "internal.h" 13 14 14 15 static const struct dentry_operations proc_sys_dentry_operations; ··· 25 24 26 25 atomic_inc(&poll->event); 27 26 wake_up_interruptible(&poll->wait); 27 + } 28 + 29 + static struct ctl_table root_table[] = { 30 + { 31 + .procname = "", 32 + .mode = S_IFDIR|S_IRUGO|S_IXUGO, 33 + }, 34 + { } 35 + }; 36 + static struct ctl_table_root sysctl_table_root = { 37 + .default_set.dir.header = { 38 + {{.count = 1, 39 + .nreg = 1, 40 + .ctl_table = root_table }}, 41 + .ctl_table_arg = root_table, 42 + .root = &sysctl_table_root, 43 + .set = &sysctl_table_root.default_set, 44 + }, 45 + }; 46 + 47 + static DEFINE_SPINLOCK(sysctl_lock); 48 + 49 + static void drop_sysctl_table(struct ctl_table_header *header); 50 + static int sysctl_follow_link(struct ctl_table_header **phead, 51 + struct ctl_table **pentry, struct nsproxy *namespaces); 52 + static int insert_links(struct ctl_table_header *head); 53 + static void put_links(struct ctl_table_header *header); 54 + 55 + static void sysctl_print_dir(struct ctl_dir *dir) 56 + { 57 + if (dir->header.parent) 58 + sysctl_print_dir(dir->header.parent); 59 + printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); 60 + } 61 + 62 + static int namecmp(const char *name1, int len1, const char *name2, int len2) 63 + { 64 + int minlen; 65 + int cmp; 66 + 67 + minlen = len1; 68 + if (minlen > len2) 69 + minlen = len2; 70 + 71 + cmp = memcmp(name1, name2, minlen); 72 + if (cmp == 0) 73 + cmp = len1 - len2; 74 + return cmp; 75 + } 76 + 77 + /* Called under sysctl_lock */ 78 + static struct ctl_table *find_entry(struct ctl_table_header **phead, 79 + struct ctl_dir *dir, const char *name, int namelen) 80 + { 81 + struct ctl_table_header *head; 82 + struct ctl_table *entry; 83 + struct rb_node *node = dir->root.rb_node; 84 + 85 + while (node) 86 + { 87 + struct ctl_node *ctl_node; 88 + const char *procname; 89 + int cmp; 90 + 91 + ctl_node = rb_entry(node, struct ctl_node, node); 92 + head = ctl_node->header; 93 + entry = &head->ctl_table[ctl_node - head->node]; 94 + procname = entry->procname; 95 + 96 + cmp = namecmp(name, namelen, procname, strlen(procname)); 97 + if (cmp < 0) 98 + node = node->rb_left; 99 + else if (cmp > 0) 100 + node = node->rb_right; 101 + else { 102 + *phead = head; 103 + return entry; 104 + } 105 + } 106 + return NULL; 107 + } 108 + 109 + static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) 110 + { 111 + struct rb_node *node = &head->node[entry - head->ctl_table].node; 112 + struct rb_node **p = &head->parent->root.rb_node; 113 + struct rb_node *parent = NULL; 114 + const char *name = entry->procname; 115 + int namelen = strlen(name); 116 + 117 + while (*p) { 118 + struct ctl_table_header *parent_head; 119 + struct ctl_table *parent_entry; 120 + struct ctl_node *parent_node; 121 + const char *parent_name; 122 + int cmp; 123 + 124 + parent = *p; 125 + parent_node = rb_entry(parent, struct ctl_node, node); 126 + parent_head = parent_node->header; 127 + parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; 128 + parent_name = parent_entry->procname; 129 + 130 + cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); 131 + if (cmp < 0) 132 + p = &(*p)->rb_left; 133 + else if (cmp > 0) 134 + p = &(*p)->rb_right; 135 + else { 136 + printk(KERN_ERR "sysctl duplicate entry: "); 137 + sysctl_print_dir(head->parent); 138 + printk(KERN_CONT "/%s\n", entry->procname); 139 + return -EEXIST; 140 + } 141 + } 142 + 143 + rb_link_node(node, parent, p); 144 + return 0; 145 + } 146 + 147 + static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) 148 + { 149 + struct rb_node *node = &head->node[entry - head->ctl_table].node; 150 + 151 + rb_erase(node, &head->parent->root); 152 + } 153 + 154 + static void init_header(struct ctl_table_header *head, 155 + struct ctl_table_root *root, struct ctl_table_set *set, 156 + struct ctl_node *node, struct ctl_table *table) 157 + { 158 + head->ctl_table = table; 159 + head->ctl_table_arg = table; 160 + head->used = 0; 161 + head->count = 1; 162 + head->nreg = 1; 163 + head->unregistering = NULL; 164 + head->root = root; 165 + head->set = set; 166 + head->parent = NULL; 167 + head->node = node; 168 + if (node) { 169 + struct ctl_table *entry; 170 + for (entry = table; entry->procname; entry++, node++) { 171 + rb_init_node(&node->node); 172 + node->header = head; 173 + } 174 + } 175 + } 176 + 177 + static void erase_header(struct ctl_table_header *head) 178 + { 179 + struct ctl_table *entry; 180 + for (entry = head->ctl_table; entry->procname; entry++) 181 + erase_entry(head, entry); 182 + } 183 + 184 + static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) 185 + { 186 + struct ctl_table *entry; 187 + int err; 188 + 189 + dir->header.nreg++; 190 + header->parent = dir; 191 + err = insert_links(header); 192 + if (err) 193 + goto fail_links; 194 + for (entry = header->ctl_table; entry->procname; entry++) { 195 + err = insert_entry(header, entry); 196 + if (err) 197 + goto fail; 198 + } 199 + return 0; 200 + fail: 201 + erase_header(header); 202 + put_links(header); 203 + fail_links: 204 + header->parent = NULL; 205 + drop_sysctl_table(&dir->header); 206 + return err; 207 + } 208 + 209 + /* called under sysctl_lock */ 210 + static int use_table(struct ctl_table_header *p) 211 + { 212 + if (unlikely(p->unregistering)) 213 + return 0; 214 + p->used++; 215 + return 1; 216 + } 217 + 218 + /* called under sysctl_lock */ 219 + static void unuse_table(struct ctl_table_header *p) 220 + { 221 + if (!--p->used) 222 + if (unlikely(p->unregistering)) 223 + complete(p->unregistering); 224 + } 225 + 226 + /* called under sysctl_lock, will reacquire if has to wait */ 227 + static void start_unregistering(struct ctl_table_header *p) 228 + { 229 + /* 230 + * if p->used is 0, nobody will ever touch that entry again; 231 + * we'll eliminate all paths to it before dropping sysctl_lock 232 + */ 233 + if (unlikely(p->used)) { 234 + struct completion wait; 235 + init_completion(&wait); 236 + p->unregistering = &wait; 237 + spin_unlock(&sysctl_lock); 238 + wait_for_completion(&wait); 239 + spin_lock(&sysctl_lock); 240 + } else { 241 + /* anything non-NULL; we'll never dereference it */ 242 + p->unregistering = ERR_PTR(-EINVAL); 243 + } 244 + /* 245 + * do not remove from the list until nobody holds it; walking the 246 + * list in do_sysctl() relies on that. 247 + */ 248 + erase_header(p); 249 + } 250 + 251 + static void sysctl_head_get(struct ctl_table_header *head) 252 + { 253 + spin_lock(&sysctl_lock); 254 + head->count++; 255 + spin_unlock(&sysctl_lock); 256 + } 257 + 258 + void sysctl_head_put(struct ctl_table_header *head) 259 + { 260 + spin_lock(&sysctl_lock); 261 + if (!--head->count) 262 + kfree_rcu(head, rcu); 263 + spin_unlock(&sysctl_lock); 264 + } 265 + 266 + static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) 267 + { 268 + if (!head) 269 + BUG(); 270 + spin_lock(&sysctl_lock); 271 + if (!use_table(head)) 272 + head = ERR_PTR(-ENOENT); 273 + spin_unlock(&sysctl_lock); 274 + return head; 275 + } 276 + 277 + static void sysctl_head_finish(struct ctl_table_header *head) 278 + { 279 + if (!head) 280 + return; 281 + spin_lock(&sysctl_lock); 282 + unuse_table(head); 283 + spin_unlock(&sysctl_lock); 284 + } 285 + 286 + static struct ctl_table_set * 287 + lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) 288 + { 289 + struct ctl_table_set *set = &root->default_set; 290 + if (root->lookup) 291 + set = root->lookup(root, namespaces); 292 + return set; 293 + } 294 + 295 + static struct ctl_table *lookup_entry(struct ctl_table_header **phead, 296 + struct ctl_dir *dir, 297 + const char *name, int namelen) 298 + { 299 + struct ctl_table_header *head; 300 + struct ctl_table *entry; 301 + 302 + spin_lock(&sysctl_lock); 303 + entry = find_entry(&head, dir, name, namelen); 304 + if (entry && use_table(head)) 305 + *phead = head; 306 + else 307 + entry = NULL; 308 + spin_unlock(&sysctl_lock); 309 + return entry; 310 + } 311 + 312 + static struct ctl_node *first_usable_entry(struct rb_node *node) 313 + { 314 + struct ctl_node *ctl_node; 315 + 316 + for (;node; node = rb_next(node)) { 317 + ctl_node = rb_entry(node, struct ctl_node, node); 318 + if (use_table(ctl_node->header)) 319 + return ctl_node; 320 + } 321 + return NULL; 322 + } 323 + 324 + static void first_entry(struct ctl_dir *dir, 325 + struct ctl_table_header **phead, struct ctl_table **pentry) 326 + { 327 + struct ctl_table_header *head = NULL; 328 + struct ctl_table *entry = NULL; 329 + struct ctl_node *ctl_node; 330 + 331 + spin_lock(&sysctl_lock); 332 + ctl_node = first_usable_entry(rb_first(&dir->root)); 333 + spin_unlock(&sysctl_lock); 334 + if (ctl_node) { 335 + head = ctl_node->header; 336 + entry = &head->ctl_table[ctl_node - head->node]; 337 + } 338 + *phead = head; 339 + *pentry = entry; 340 + } 341 + 342 + static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) 343 + { 344 + struct ctl_table_header *head = *phead; 345 + struct ctl_table *entry = *pentry; 346 + struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; 347 + 348 + spin_lock(&sysctl_lock); 349 + unuse_table(head); 350 + 351 + ctl_node = first_usable_entry(rb_next(&ctl_node->node)); 352 + spin_unlock(&sysctl_lock); 353 + head = NULL; 354 + if (ctl_node) { 355 + head = ctl_node->header; 356 + entry = &head->ctl_table[ctl_node - head->node]; 357 + } 358 + *phead = head; 359 + *pentry = entry; 360 + } 361 + 362 + void register_sysctl_root(struct ctl_table_root *root) 363 + { 364 + } 365 + 366 + /* 367 + * sysctl_perm does NOT grant the superuser all rights automatically, because 368 + * some sysctl variables are readonly even to root. 369 + */ 370 + 371 + static int test_perm(int mode, int op) 372 + { 373 + if (!current_euid()) 374 + mode >>= 6; 375 + else if (in_egroup_p(0)) 376 + mode >>= 3; 377 + if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) 378 + return 0; 379 + return -EACCES; 380 + } 381 + 382 + static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) 383 + { 384 + int mode; 385 + 386 + if (root->permissions) 387 + mode = root->permissions(root, current->nsproxy, table); 388 + else 389 + mode = table->mode; 390 + 391 + return test_perm(mode, op); 28 392 } 29 393 30 394 static struct inode *proc_sys_make_inode(struct super_block *sb, ··· 411 45 412 46 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 413 47 inode->i_mode = table->mode; 414 - if (!table->child) { 48 + if (!S_ISDIR(table->mode)) { 415 49 inode->i_mode |= S_IFREG; 416 50 inode->i_op = &proc_sys_inode_operations; 417 51 inode->i_fop = &proc_sys_file_operations; 418 52 } else { 419 53 inode->i_mode |= S_IFDIR; 420 - clear_nlink(inode); 421 54 inode->i_op = &proc_sys_dir_operations; 422 55 inode->i_fop = &proc_sys_dir_file_operations; 423 56 } ··· 424 59 return inode; 425 60 } 426 61 427 - static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) 428 - { 429 - int len; 430 - for ( ; p->procname; p++) { 431 - 432 - if (!p->procname) 433 - continue; 434 - 435 - len = strlen(p->procname); 436 - if (len != name->len) 437 - continue; 438 - 439 - if (memcmp(p->procname, name->name, len) != 0) 440 - continue; 441 - 442 - /* I have a match */ 443 - return p; 444 - } 445 - return NULL; 446 - } 447 - 448 62 static struct ctl_table_header *grab_header(struct inode *inode) 449 63 { 450 - if (PROC_I(inode)->sysctl) 451 - return sysctl_head_grab(PROC_I(inode)->sysctl); 452 - else 453 - return sysctl_head_next(NULL); 64 + struct ctl_table_header *head = PROC_I(inode)->sysctl; 65 + if (!head) 66 + head = &sysctl_table_root.default_set.dir.header; 67 + return sysctl_head_grab(head); 454 68 } 455 69 456 70 static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, 457 71 struct nameidata *nd) 458 72 { 459 73 struct ctl_table_header *head = grab_header(dir); 460 - struct ctl_table *table = PROC_I(dir)->sysctl_entry; 461 74 struct ctl_table_header *h = NULL; 462 75 struct qstr *name = &dentry->d_name; 463 76 struct ctl_table *p; 464 77 struct inode *inode; 465 78 struct dentry *err = ERR_PTR(-ENOENT); 79 + struct ctl_dir *ctl_dir; 80 + int ret; 466 81 467 82 if (IS_ERR(head)) 468 83 return ERR_CAST(head); 469 84 470 - if (table && !table->child) { 471 - WARN_ON(1); 472 - goto out; 473 - } 85 + ctl_dir = container_of(head, struct ctl_dir, header); 474 86 475 - table = table ? table->child : head->ctl_table; 476 - 477 - p = find_in_table(table, name); 478 - if (!p) { 479 - for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { 480 - if (h->attached_to != table) 481 - continue; 482 - p = find_in_table(h->attached_by, name); 483 - if (p) 484 - break; 485 - } 486 - } 487 - 87 + p = lookup_entry(&h, ctl_dir, name->name, name->len); 488 88 if (!p) 489 89 goto out; 90 + 91 + if (S_ISLNK(p->mode)) { 92 + ret = sysctl_follow_link(&h, &p, current->nsproxy); 93 + err = ERR_PTR(ret); 94 + if (ret) 95 + goto out; 96 + } 490 97 491 98 err = ERR_PTR(-ENOMEM); 492 99 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); ··· 527 190 528 191 static int proc_sys_open(struct inode *inode, struct file *filp) 529 192 { 193 + struct ctl_table_header *head = grab_header(inode); 530 194 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 195 + 196 + /* sysctl was unregistered */ 197 + if (IS_ERR(head)) 198 + return PTR_ERR(head); 531 199 532 200 if (table->poll) 533 201 filp->private_data = proc_sys_poll_event(table->poll); 202 + 203 + sysctl_head_finish(head); 534 204 535 205 return 0; 536 206 } ··· 545 201 static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) 546 202 { 547 203 struct inode *inode = filp->f_path.dentry->d_inode; 204 + struct ctl_table_header *head = grab_header(inode); 548 205 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 549 - unsigned long event = (unsigned long)filp->private_data; 550 206 unsigned int ret = DEFAULT_POLLMASK; 207 + unsigned long event; 208 + 209 + /* sysctl was unregistered */ 210 + if (IS_ERR(head)) 211 + return POLLERR | POLLHUP; 551 212 552 213 if (!table->proc_handler) 553 214 goto out; ··· 560 211 if (!table->poll) 561 212 goto out; 562 213 214 + event = (unsigned long)filp->private_data; 563 215 poll_wait(filp, &table->poll->wait, wait); 564 216 565 217 if (event != atomic_read(&table->poll->event)) { ··· 569 219 } 570 220 571 221 out: 222 + sysctl_head_finish(head); 223 + 572 224 return ret; 573 225 } 574 226 ··· 612 260 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 613 261 } 614 262 263 + static int proc_sys_link_fill_cache(struct file *filp, void *dirent, 264 + filldir_t filldir, 265 + struct ctl_table_header *head, 266 + struct ctl_table *table) 267 + { 268 + int err, ret = 0; 269 + head = sysctl_head_grab(head); 270 + 271 + if (S_ISLNK(table->mode)) { 272 + /* It is not an error if we can not follow the link ignore it */ 273 + err = sysctl_follow_link(&head, &table, current->nsproxy); 274 + if (err) 275 + goto out; 276 + } 277 + 278 + ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); 279 + out: 280 + sysctl_head_finish(head); 281 + return ret; 282 + } 283 + 615 284 static int scan(struct ctl_table_header *head, ctl_table *table, 616 285 unsigned long *pos, struct file *file, 617 286 void *dirent, filldir_t filldir) 618 287 { 288 + int res; 619 289 620 - for (; table->procname; table++, (*pos)++) { 621 - int res; 290 + if ((*pos)++ < file->f_pos) 291 + return 0; 622 292 623 - /* Can't do anything without a proc name */ 624 - if (!table->procname) 625 - continue; 626 - 627 - if (*pos < file->f_pos) 628 - continue; 629 - 293 + if (unlikely(S_ISLNK(table->mode))) 294 + res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); 295 + else 630 296 res = proc_sys_fill_cache(file, dirent, filldir, head, table); 631 - if (res) 632 - return res; 633 297 634 - file->f_pos = *pos + 1; 635 - } 636 - return 0; 298 + if (res == 0) 299 + file->f_pos = *pos; 300 + 301 + return res; 637 302 } 638 303 639 304 static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) ··· 658 289 struct dentry *dentry = filp->f_path.dentry; 659 290 struct inode *inode = dentry->d_inode; 660 291 struct ctl_table_header *head = grab_header(inode); 661 - struct ctl_table *table = PROC_I(inode)->sysctl_entry; 662 292 struct ctl_table_header *h = NULL; 293 + struct ctl_table *entry; 294 + struct ctl_dir *ctl_dir; 663 295 unsigned long pos; 664 296 int ret = -EINVAL; 665 297 666 298 if (IS_ERR(head)) 667 299 return PTR_ERR(head); 668 300 669 - if (table && !table->child) { 670 - WARN_ON(1); 671 - goto out; 672 - } 673 - 674 - table = table ? table->child : head->ctl_table; 301 + ctl_dir = container_of(head, struct ctl_dir, header); 675 302 676 303 ret = 0; 677 304 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ ··· 685 320 } 686 321 pos = 2; 687 322 688 - ret = scan(head, table, &pos, filp, dirent, filldir); 689 - if (ret) 690 - goto out; 691 - 692 - for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { 693 - if (h->attached_to != table) 694 - continue; 695 - ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); 323 + for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { 324 + ret = scan(h, entry, &pos, filp, dirent, filldir); 696 325 if (ret) { 697 326 sysctl_head_finish(h); 698 327 break; ··· 806 447 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 807 448 } 808 449 450 + static int sysctl_is_seen(struct ctl_table_header *p) 451 + { 452 + struct ctl_table_set *set = p->set; 453 + int res; 454 + spin_lock(&sysctl_lock); 455 + if (p->unregistering) 456 + res = 0; 457 + else if (!set->is_seen) 458 + res = 1; 459 + else 460 + res = set->is_seen(set); 461 + spin_unlock(&sysctl_lock); 462 + return res; 463 + } 464 + 809 465 static int proc_sys_compare(const struct dentry *parent, 810 466 const struct inode *pinode, 811 467 const struct dentry *dentry, const struct inode *inode, ··· 846 472 .d_compare = proc_sys_compare, 847 473 }; 848 474 475 + static struct ctl_dir *find_subdir(struct ctl_dir *dir, 476 + const char *name, int namelen) 477 + { 478 + struct ctl_table_header *head; 479 + struct ctl_table *entry; 480 + 481 + entry = find_entry(&head, dir, name, namelen); 482 + if (!entry) 483 + return ERR_PTR(-ENOENT); 484 + if (!S_ISDIR(entry->mode)) 485 + return ERR_PTR(-ENOTDIR); 486 + return container_of(head, struct ctl_dir, header); 487 + } 488 + 489 + static struct ctl_dir *new_dir(struct ctl_table_set *set, 490 + const char *name, int namelen) 491 + { 492 + struct ctl_table *table; 493 + struct ctl_dir *new; 494 + struct ctl_node *node; 495 + char *new_name; 496 + 497 + new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + 498 + sizeof(struct ctl_table)*2 + namelen + 1, 499 + GFP_KERNEL); 500 + if (!new) 501 + return NULL; 502 + 503 + node = (struct ctl_node *)(new + 1); 504 + table = (struct ctl_table *)(node + 1); 505 + new_name = (char *)(table + 2); 506 + memcpy(new_name, name, namelen); 507 + new_name[namelen] = '\0'; 508 + table[0].procname = new_name; 509 + table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; 510 + init_header(&new->header, set->dir.header.root, set, node, table); 511 + 512 + return new; 513 + } 514 + 515 + /** 516 + * get_subdir - find or create a subdir with the specified name. 517 + * @dir: Directory to create the subdirectory in 518 + * @name: The name of the subdirectory to find or create 519 + * @namelen: The length of name 520 + * 521 + * Takes a directory with an elevated reference count so we know that 522 + * if we drop the lock the directory will not go away. Upon success 523 + * the reference is moved from @dir to the returned subdirectory. 524 + * Upon error an error code is returned and the reference on @dir is 525 + * simply dropped. 526 + */ 527 + static struct ctl_dir *get_subdir(struct ctl_dir *dir, 528 + const char *name, int namelen) 529 + { 530 + struct ctl_table_set *set = dir->header.set; 531 + struct ctl_dir *subdir, *new = NULL; 532 + int err; 533 + 534 + spin_lock(&sysctl_lock); 535 + subdir = find_subdir(dir, name, namelen); 536 + if (!IS_ERR(subdir)) 537 + goto found; 538 + if (PTR_ERR(subdir) != -ENOENT) 539 + goto failed; 540 + 541 + spin_unlock(&sysctl_lock); 542 + new = new_dir(set, name, namelen); 543 + spin_lock(&sysctl_lock); 544 + subdir = ERR_PTR(-ENOMEM); 545 + if (!new) 546 + goto failed; 547 + 548 + /* Was the subdir added while we dropped the lock? */ 549 + subdir = find_subdir(dir, name, namelen); 550 + if (!IS_ERR(subdir)) 551 + goto found; 552 + if (PTR_ERR(subdir) != -ENOENT) 553 + goto failed; 554 + 555 + /* Nope. Use the our freshly made directory entry. */ 556 + err = insert_header(dir, &new->header); 557 + subdir = ERR_PTR(err); 558 + if (err) 559 + goto failed; 560 + subdir = new; 561 + found: 562 + subdir->header.nreg++; 563 + failed: 564 + if (unlikely(IS_ERR(subdir))) { 565 + printk(KERN_ERR "sysctl could not get directory: "); 566 + sysctl_print_dir(dir); 567 + printk(KERN_CONT "/%*.*s %ld\n", 568 + namelen, namelen, name, PTR_ERR(subdir)); 569 + } 570 + drop_sysctl_table(&dir->header); 571 + if (new) 572 + drop_sysctl_table(&new->header); 573 + spin_unlock(&sysctl_lock); 574 + return subdir; 575 + } 576 + 577 + static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) 578 + { 579 + struct ctl_dir *parent; 580 + const char *procname; 581 + if (!dir->header.parent) 582 + return &set->dir; 583 + parent = xlate_dir(set, dir->header.parent); 584 + if (IS_ERR(parent)) 585 + return parent; 586 + procname = dir->header.ctl_table[0].procname; 587 + return find_subdir(parent, procname, strlen(procname)); 588 + } 589 + 590 + static int sysctl_follow_link(struct ctl_table_header **phead, 591 + struct ctl_table **pentry, struct nsproxy *namespaces) 592 + { 593 + struct ctl_table_header *head; 594 + struct ctl_table_root *root; 595 + struct ctl_table_set *set; 596 + struct ctl_table *entry; 597 + struct ctl_dir *dir; 598 + int ret; 599 + 600 + ret = 0; 601 + spin_lock(&sysctl_lock); 602 + root = (*pentry)->data; 603 + set = lookup_header_set(root, namespaces); 604 + dir = xlate_dir(set, (*phead)->parent); 605 + if (IS_ERR(dir)) 606 + ret = PTR_ERR(dir); 607 + else { 608 + const char *procname = (*pentry)->procname; 609 + head = NULL; 610 + entry = find_entry(&head, dir, procname, strlen(procname)); 611 + ret = -ENOENT; 612 + if (entry && use_table(head)) { 613 + unuse_table(*phead); 614 + *phead = head; 615 + *pentry = entry; 616 + ret = 0; 617 + } 618 + } 619 + 620 + spin_unlock(&sysctl_lock); 621 + return ret; 622 + } 623 + 624 + static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) 625 + { 626 + struct va_format vaf; 627 + va_list args; 628 + 629 + va_start(args, fmt); 630 + vaf.fmt = fmt; 631 + vaf.va = &args; 632 + 633 + printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", 634 + path, table->procname, &vaf); 635 + 636 + va_end(args); 637 + return -EINVAL; 638 + } 639 + 640 + static int sysctl_check_table(const char *path, struct ctl_table *table) 641 + { 642 + int err = 0; 643 + for (; table->procname; table++) { 644 + if (table->child) 645 + err = sysctl_err(path, table, "Not a file"); 646 + 647 + if ((table->proc_handler == proc_dostring) || 648 + (table->proc_handler == proc_dointvec) || 649 + (table->proc_handler == proc_dointvec_minmax) || 650 + (table->proc_handler == proc_dointvec_jiffies) || 651 + (table->proc_handler == proc_dointvec_userhz_jiffies) || 652 + (table->proc_handler == proc_dointvec_ms_jiffies) || 653 + (table->proc_handler == proc_doulongvec_minmax) || 654 + (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { 655 + if (!table->data) 656 + err = sysctl_err(path, table, "No data"); 657 + if (!table->maxlen) 658 + err = sysctl_err(path, table, "No maxlen"); 659 + } 660 + if (!table->proc_handler) 661 + err = sysctl_err(path, table, "No proc_handler"); 662 + 663 + if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) 664 + err = sysctl_err(path, table, "bogus .mode 0%o", 665 + table->mode); 666 + } 667 + return err; 668 + } 669 + 670 + static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table, 671 + struct ctl_table_root *link_root) 672 + { 673 + struct ctl_table *link_table, *entry, *link; 674 + struct ctl_table_header *links; 675 + struct ctl_node *node; 676 + char *link_name; 677 + int nr_entries, name_bytes; 678 + 679 + name_bytes = 0; 680 + nr_entries = 0; 681 + for (entry = table; entry->procname; entry++) { 682 + nr_entries++; 683 + name_bytes += strlen(entry->procname) + 1; 684 + } 685 + 686 + links = kzalloc(sizeof(struct ctl_table_header) + 687 + sizeof(struct ctl_node)*nr_entries + 688 + sizeof(struct ctl_table)*(nr_entries + 1) + 689 + name_bytes, 690 + GFP_KERNEL); 691 + 692 + if (!links) 693 + return NULL; 694 + 695 + node = (struct ctl_node *)(links + 1); 696 + link_table = (struct ctl_table *)(node + nr_entries); 697 + link_name = (char *)&link_table[nr_entries + 1]; 698 + 699 + for (link = link_table, entry = table; entry->procname; link++, entry++) { 700 + int len = strlen(entry->procname) + 1; 701 + memcpy(link_name, entry->procname, len); 702 + link->procname = link_name; 703 + link->mode = S_IFLNK|S_IRWXUGO; 704 + link->data = link_root; 705 + link_name += len; 706 + } 707 + init_header(links, dir->header.root, dir->header.set, node, link_table); 708 + links->nreg = nr_entries; 709 + 710 + return links; 711 + } 712 + 713 + static bool get_links(struct ctl_dir *dir, 714 + struct ctl_table *table, struct ctl_table_root *link_root) 715 + { 716 + struct ctl_table_header *head; 717 + struct ctl_table *entry, *link; 718 + 719 + /* Are there links available for every entry in table? */ 720 + for (entry = table; entry->procname; entry++) { 721 + const char *procname = entry->procname; 722 + link = find_entry(&head, dir, procname, strlen(procname)); 723 + if (!link) 724 + return false; 725 + if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) 726 + continue; 727 + if (S_ISLNK(link->mode) && (link->data == link_root)) 728 + continue; 729 + return false; 730 + } 731 + 732 + /* The checks passed. Increase the registration count on the links */ 733 + for (entry = table; entry->procname; entry++) { 734 + const char *procname = entry->procname; 735 + link = find_entry(&head, dir, procname, strlen(procname)); 736 + head->nreg++; 737 + } 738 + return true; 739 + } 740 + 741 + static int insert_links(struct ctl_table_header *head) 742 + { 743 + struct ctl_table_set *root_set = &sysctl_table_root.default_set; 744 + struct ctl_dir *core_parent = NULL; 745 + struct ctl_table_header *links; 746 + int err; 747 + 748 + if (head->set == root_set) 749 + return 0; 750 + 751 + core_parent = xlate_dir(root_set, head->parent); 752 + if (IS_ERR(core_parent)) 753 + return 0; 754 + 755 + if (get_links(core_parent, head->ctl_table, head->root)) 756 + return 0; 757 + 758 + core_parent->header.nreg++; 759 + spin_unlock(&sysctl_lock); 760 + 761 + links = new_links(core_parent, head->ctl_table, head->root); 762 + 763 + spin_lock(&sysctl_lock); 764 + err = -ENOMEM; 765 + if (!links) 766 + goto out; 767 + 768 + err = 0; 769 + if (get_links(core_parent, head->ctl_table, head->root)) { 770 + kfree(links); 771 + goto out; 772 + } 773 + 774 + err = insert_header(core_parent, links); 775 + if (err) 776 + kfree(links); 777 + out: 778 + drop_sysctl_table(&core_parent->header); 779 + return err; 780 + } 781 + 782 + /** 783 + * __register_sysctl_table - register a leaf sysctl table 784 + * @set: Sysctl tree to register on 785 + * @path: The path to the directory the sysctl table is in. 786 + * @table: the top-level table structure 787 + * 788 + * Register a sysctl table hierarchy. @table should be a filled in ctl_table 789 + * array. A completely 0 filled entry terminates the table. 790 + * 791 + * The members of the &struct ctl_table structure are used as follows: 792 + * 793 + * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not 794 + * enter a sysctl file 795 + * 796 + * data - a pointer to data for use by proc_handler 797 + * 798 + * maxlen - the maximum size in bytes of the data 799 + * 800 + * mode - the file permissions for the /proc/sys file 801 + * 802 + * child - must be %NULL. 803 + * 804 + * proc_handler - the text handler routine (described below) 805 + * 806 + * extra1, extra2 - extra pointers usable by the proc handler routines 807 + * 808 + * Leaf nodes in the sysctl tree will be represented by a single file 809 + * under /proc; non-leaf nodes will be represented by directories. 810 + * 811 + * There must be a proc_handler routine for any terminal nodes. 812 + * Several default handlers are available to cover common cases - 813 + * 814 + * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), 815 + * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 816 + * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() 817 + * 818 + * It is the handler's job to read the input buffer from user memory 819 + * and process it. The handler should return 0 on success. 820 + * 821 + * This routine returns %NULL on a failure to register, and a pointer 822 + * to the table header on success. 823 + */ 824 + struct ctl_table_header *__register_sysctl_table( 825 + struct ctl_table_set *set, 826 + const char *path, struct ctl_table *table) 827 + { 828 + struct ctl_table_root *root = set->dir.header.root; 829 + struct ctl_table_header *header; 830 + const char *name, *nextname; 831 + struct ctl_dir *dir; 832 + struct ctl_table *entry; 833 + struct ctl_node *node; 834 + int nr_entries = 0; 835 + 836 + for (entry = table; entry->procname; entry++) 837 + nr_entries++; 838 + 839 + header = kzalloc(sizeof(struct ctl_table_header) + 840 + sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); 841 + if (!header) 842 + return NULL; 843 + 844 + node = (struct ctl_node *)(header + 1); 845 + init_header(header, root, set, node, table); 846 + if (sysctl_check_table(path, table)) 847 + goto fail; 848 + 849 + spin_lock(&sysctl_lock); 850 + dir = &set->dir; 851 + /* Reference moved down the diretory tree get_subdir */ 852 + dir->header.nreg++; 853 + spin_unlock(&sysctl_lock); 854 + 855 + /* Find the directory for the ctl_table */ 856 + for (name = path; name; name = nextname) { 857 + int namelen; 858 + nextname = strchr(name, '/'); 859 + if (nextname) { 860 + namelen = nextname - name; 861 + nextname++; 862 + } else { 863 + namelen = strlen(name); 864 + } 865 + if (namelen == 0) 866 + continue; 867 + 868 + dir = get_subdir(dir, name, namelen); 869 + if (IS_ERR(dir)) 870 + goto fail; 871 + } 872 + 873 + spin_lock(&sysctl_lock); 874 + if (insert_header(dir, header)) 875 + goto fail_put_dir_locked; 876 + 877 + drop_sysctl_table(&dir->header); 878 + spin_unlock(&sysctl_lock); 879 + 880 + return header; 881 + 882 + fail_put_dir_locked: 883 + drop_sysctl_table(&dir->header); 884 + spin_unlock(&sysctl_lock); 885 + fail: 886 + kfree(header); 887 + dump_stack(); 888 + return NULL; 889 + } 890 + 891 + /** 892 + * register_sysctl - register a sysctl table 893 + * @path: The path to the directory the sysctl table is in. 894 + * @table: the table structure 895 + * 896 + * Register a sysctl table. @table should be a filled in ctl_table 897 + * array. A completely 0 filled entry terminates the table. 898 + * 899 + * See __register_sysctl_table for more details. 900 + */ 901 + struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) 902 + { 903 + return __register_sysctl_table(&sysctl_table_root.default_set, 904 + path, table); 905 + } 906 + EXPORT_SYMBOL(register_sysctl); 907 + 908 + static char *append_path(const char *path, char *pos, const char *name) 909 + { 910 + int namelen; 911 + namelen = strlen(name); 912 + if (((pos - path) + namelen + 2) >= PATH_MAX) 913 + return NULL; 914 + memcpy(pos, name, namelen); 915 + pos[namelen] = '/'; 916 + pos[namelen + 1] = '\0'; 917 + pos += namelen + 1; 918 + return pos; 919 + } 920 + 921 + static int count_subheaders(struct ctl_table *table) 922 + { 923 + int has_files = 0; 924 + int nr_subheaders = 0; 925 + struct ctl_table *entry; 926 + 927 + /* special case: no directory and empty directory */ 928 + if (!table || !table->procname) 929 + return 1; 930 + 931 + for (entry = table; entry->procname; entry++) { 932 + if (entry->child) 933 + nr_subheaders += count_subheaders(entry->child); 934 + else 935 + has_files = 1; 936 + } 937 + return nr_subheaders + has_files; 938 + } 939 + 940 + static int register_leaf_sysctl_tables(const char *path, char *pos, 941 + struct ctl_table_header ***subheader, struct ctl_table_set *set, 942 + struct ctl_table *table) 943 + { 944 + struct ctl_table *ctl_table_arg = NULL; 945 + struct ctl_table *entry, *files; 946 + int nr_files = 0; 947 + int nr_dirs = 0; 948 + int err = -ENOMEM; 949 + 950 + for (entry = table; entry->procname; entry++) { 951 + if (entry->child) 952 + nr_dirs++; 953 + else 954 + nr_files++; 955 + } 956 + 957 + files = table; 958 + /* If there are mixed files and directories we need a new table */ 959 + if (nr_dirs && nr_files) { 960 + struct ctl_table *new; 961 + files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), 962 + GFP_KERNEL); 963 + if (!files) 964 + goto out; 965 + 966 + ctl_table_arg = files; 967 + for (new = files, entry = table; entry->procname; entry++) { 968 + if (entry->child) 969 + continue; 970 + *new = *entry; 971 + new++; 972 + } 973 + } 974 + 975 + /* Register everything except a directory full of subdirectories */ 976 + if (nr_files || !nr_dirs) { 977 + struct ctl_table_header *header; 978 + header = __register_sysctl_table(set, path, files); 979 + if (!header) { 980 + kfree(ctl_table_arg); 981 + goto out; 982 + } 983 + 984 + /* Remember if we need to free the file table */ 985 + header->ctl_table_arg = ctl_table_arg; 986 + **subheader = header; 987 + (*subheader)++; 988 + } 989 + 990 + /* Recurse into the subdirectories. */ 991 + for (entry = table; entry->procname; entry++) { 992 + char *child_pos; 993 + 994 + if (!entry->child) 995 + continue; 996 + 997 + err = -ENAMETOOLONG; 998 + child_pos = append_path(path, pos, entry->procname); 999 + if (!child_pos) 1000 + goto out; 1001 + 1002 + err = register_leaf_sysctl_tables(path, child_pos, subheader, 1003 + set, entry->child); 1004 + pos[0] = '\0'; 1005 + if (err) 1006 + goto out; 1007 + } 1008 + err = 0; 1009 + out: 1010 + /* On failure our caller will unregister all registered subheaders */ 1011 + return err; 1012 + } 1013 + 1014 + /** 1015 + * __register_sysctl_paths - register a sysctl table hierarchy 1016 + * @set: Sysctl tree to register on 1017 + * @path: The path to the directory the sysctl table is in. 1018 + * @table: the top-level table structure 1019 + * 1020 + * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1021 + * array. A completely 0 filled entry terminates the table. 1022 + * 1023 + * See __register_sysctl_table for more details. 1024 + */ 1025 + struct ctl_table_header *__register_sysctl_paths( 1026 + struct ctl_table_set *set, 1027 + const struct ctl_path *path, struct ctl_table *table) 1028 + { 1029 + struct ctl_table *ctl_table_arg = table; 1030 + int nr_subheaders = count_subheaders(table); 1031 + struct ctl_table_header *header = NULL, **subheaders, **subheader; 1032 + const struct ctl_path *component; 1033 + char *new_path, *pos; 1034 + 1035 + pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); 1036 + if (!new_path) 1037 + return NULL; 1038 + 1039 + pos[0] = '\0'; 1040 + for (component = path; component->procname; component++) { 1041 + pos = append_path(new_path, pos, component->procname); 1042 + if (!pos) 1043 + goto out; 1044 + } 1045 + while (table->procname && table->child && !table[1].procname) { 1046 + pos = append_path(new_path, pos, table->procname); 1047 + if (!pos) 1048 + goto out; 1049 + table = table->child; 1050 + } 1051 + if (nr_subheaders == 1) { 1052 + header = __register_sysctl_table(set, new_path, table); 1053 + if (header) 1054 + header->ctl_table_arg = ctl_table_arg; 1055 + } else { 1056 + header = kzalloc(sizeof(*header) + 1057 + sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); 1058 + if (!header) 1059 + goto out; 1060 + 1061 + subheaders = (struct ctl_table_header **) (header + 1); 1062 + subheader = subheaders; 1063 + header->ctl_table_arg = ctl_table_arg; 1064 + 1065 + if (register_leaf_sysctl_tables(new_path, pos, &subheader, 1066 + set, table)) 1067 + goto err_register_leaves; 1068 + } 1069 + 1070 + out: 1071 + kfree(new_path); 1072 + return header; 1073 + 1074 + err_register_leaves: 1075 + while (subheader > subheaders) { 1076 + struct ctl_table_header *subh = *(--subheader); 1077 + struct ctl_table *table = subh->ctl_table_arg; 1078 + unregister_sysctl_table(subh); 1079 + kfree(table); 1080 + } 1081 + kfree(header); 1082 + header = NULL; 1083 + goto out; 1084 + } 1085 + 1086 + /** 1087 + * register_sysctl_table_path - register a sysctl table hierarchy 1088 + * @path: The path to the directory the sysctl table is in. 1089 + * @table: the top-level table structure 1090 + * 1091 + * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1092 + * array. A completely 0 filled entry terminates the table. 1093 + * 1094 + * See __register_sysctl_paths for more details. 1095 + */ 1096 + struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, 1097 + struct ctl_table *table) 1098 + { 1099 + return __register_sysctl_paths(&sysctl_table_root.default_set, 1100 + path, table); 1101 + } 1102 + EXPORT_SYMBOL(register_sysctl_paths); 1103 + 1104 + /** 1105 + * register_sysctl_table - register a sysctl table hierarchy 1106 + * @table: the top-level table structure 1107 + * 1108 + * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1109 + * array. A completely 0 filled entry terminates the table. 1110 + * 1111 + * See register_sysctl_paths for more details. 1112 + */ 1113 + struct ctl_table_header *register_sysctl_table(struct ctl_table *table) 1114 + { 1115 + static const struct ctl_path null_path[] = { {} }; 1116 + 1117 + return register_sysctl_paths(null_path, table); 1118 + } 1119 + EXPORT_SYMBOL(register_sysctl_table); 1120 + 1121 + static void put_links(struct ctl_table_header *header) 1122 + { 1123 + struct ctl_table_set *root_set = &sysctl_table_root.default_set; 1124 + struct ctl_table_root *root = header->root; 1125 + struct ctl_dir *parent = header->parent; 1126 + struct ctl_dir *core_parent; 1127 + struct ctl_table *entry; 1128 + 1129 + if (header->set == root_set) 1130 + return; 1131 + 1132 + core_parent = xlate_dir(root_set, parent); 1133 + if (IS_ERR(core_parent)) 1134 + return; 1135 + 1136 + for (entry = header->ctl_table; entry->procname; entry++) { 1137 + struct ctl_table_header *link_head; 1138 + struct ctl_table *link; 1139 + const char *name = entry->procname; 1140 + 1141 + link = find_entry(&link_head, core_parent, name, strlen(name)); 1142 + if (link && 1143 + ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || 1144 + (S_ISLNK(link->mode) && (link->data == root)))) { 1145 + drop_sysctl_table(link_head); 1146 + } 1147 + else { 1148 + printk(KERN_ERR "sysctl link missing during unregister: "); 1149 + sysctl_print_dir(parent); 1150 + printk(KERN_CONT "/%s\n", name); 1151 + } 1152 + } 1153 + } 1154 + 1155 + static void drop_sysctl_table(struct ctl_table_header *header) 1156 + { 1157 + struct ctl_dir *parent = header->parent; 1158 + 1159 + if (--header->nreg) 1160 + return; 1161 + 1162 + put_links(header); 1163 + start_unregistering(header); 1164 + if (!--header->count) 1165 + kfree_rcu(header, rcu); 1166 + 1167 + if (parent) 1168 + drop_sysctl_table(&parent->header); 1169 + } 1170 + 1171 + /** 1172 + * unregister_sysctl_table - unregister a sysctl table hierarchy 1173 + * @header: the header returned from register_sysctl_table 1174 + * 1175 + * Unregisters the sysctl table and all children. proc entries may not 1176 + * actually be removed until they are no longer used by anyone. 1177 + */ 1178 + void unregister_sysctl_table(struct ctl_table_header * header) 1179 + { 1180 + int nr_subheaders; 1181 + might_sleep(); 1182 + 1183 + if (header == NULL) 1184 + return; 1185 + 1186 + nr_subheaders = count_subheaders(header->ctl_table_arg); 1187 + if (unlikely(nr_subheaders > 1)) { 1188 + struct ctl_table_header **subheaders; 1189 + int i; 1190 + 1191 + subheaders = (struct ctl_table_header **)(header + 1); 1192 + for (i = nr_subheaders -1; i >= 0; i--) { 1193 + struct ctl_table_header *subh = subheaders[i]; 1194 + struct ctl_table *table = subh->ctl_table_arg; 1195 + unregister_sysctl_table(subh); 1196 + kfree(table); 1197 + } 1198 + kfree(header); 1199 + return; 1200 + } 1201 + 1202 + spin_lock(&sysctl_lock); 1203 + drop_sysctl_table(header); 1204 + spin_unlock(&sysctl_lock); 1205 + } 1206 + EXPORT_SYMBOL(unregister_sysctl_table); 1207 + 1208 + void setup_sysctl_set(struct ctl_table_set *set, 1209 + struct ctl_table_root *root, 1210 + int (*is_seen)(struct ctl_table_set *)) 1211 + { 1212 + memset(set, 0, sizeof(*set)); 1213 + set->is_seen = is_seen; 1214 + init_header(&set->dir.header, root, set, NULL, root_table); 1215 + } 1216 + 1217 + void retire_sysctl_set(struct ctl_table_set *set) 1218 + { 1219 + WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); 1220 + } 1221 + 849 1222 int __init proc_sys_init(void) 850 1223 { 851 1224 struct proc_dir_entry *proc_sys_root; ··· 1601 480 proc_sys_root->proc_iops = &proc_sys_dir_operations; 1602 481 proc_sys_root->proc_fops = &proc_sys_dir_file_operations; 1603 482 proc_sys_root->nlink = 0; 1604 - return 0; 483 + 484 + return sysctl_init(); 1605 485 }
+67 -39
include/linux/sysctl.h
··· 932 932 #include <linux/list.h> 933 933 #include <linux/rcupdate.h> 934 934 #include <linux/wait.h> 935 + #include <linux/rbtree.h> 935 936 936 937 /* For the /proc/sys support */ 937 938 struct ctl_table; 938 939 struct nsproxy; 939 940 struct ctl_table_root; 940 - 941 - struct ctl_table_set { 942 - struct list_head list; 943 - struct ctl_table_set *parent; 944 - int (*is_seen)(struct ctl_table_set *); 945 - }; 946 - 947 - extern void setup_sysctl_set(struct ctl_table_set *p, 948 - struct ctl_table_set *parent, 949 - int (*is_seen)(struct ctl_table_set *)); 950 - 951 941 struct ctl_table_header; 952 - 953 - extern void sysctl_head_get(struct ctl_table_header *); 954 - extern void sysctl_head_put(struct ctl_table_header *); 955 - extern int sysctl_is_seen(struct ctl_table_header *); 956 - extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); 957 - extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); 958 - extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, 959 - struct ctl_table_header *prev); 960 - extern void sysctl_head_finish(struct ctl_table_header *prev); 961 - extern int sysctl_perm(struct ctl_table_root *root, 962 - struct ctl_table *table, int op); 942 + struct ctl_dir; 963 943 964 944 typedef struct ctl_table ctl_table; 965 945 ··· 1003 1023 return (void *)(unsigned long)atomic_read(&poll->event); 1004 1024 } 1005 1025 1006 - void proc_sys_poll_notify(struct ctl_table_poll *poll); 1007 - 1008 1026 #define __CTL_TABLE_POLL_INITIALIZER(name) { \ 1009 1027 .event = ATOMIC_INIT(0), \ 1010 1028 .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) } ··· 1017 1039 void *data; 1018 1040 int maxlen; 1019 1041 umode_t mode; 1020 - struct ctl_table *child; 1021 - struct ctl_table *parent; /* Automatically set */ 1042 + struct ctl_table *child; /* Deprecated */ 1022 1043 proc_handler *proc_handler; /* Callback for text formatting */ 1023 1044 struct ctl_table_poll *poll; 1024 1045 void *extra1; 1025 1046 void *extra2; 1026 1047 }; 1027 1048 1028 - struct ctl_table_root { 1029 - struct list_head root_list; 1030 - struct ctl_table_set default_set; 1031 - struct ctl_table_set *(*lookup)(struct ctl_table_root *root, 1032 - struct nsproxy *namespaces); 1033 - int (*permissions)(struct ctl_table_root *root, 1034 - struct nsproxy *namespaces, struct ctl_table *table); 1049 + struct ctl_node { 1050 + struct rb_node node; 1051 + struct ctl_table_header *header; 1035 1052 }; 1036 1053 1037 1054 /* struct ctl_table_header is used to maintain dynamic lists of ··· 1036 1063 union { 1037 1064 struct { 1038 1065 struct ctl_table *ctl_table; 1039 - struct list_head ctl_entry; 1040 1066 int used; 1041 1067 int count; 1068 + int nreg; 1042 1069 }; 1043 1070 struct rcu_head rcu; 1044 1071 }; ··· 1046 1073 struct ctl_table *ctl_table_arg; 1047 1074 struct ctl_table_root *root; 1048 1075 struct ctl_table_set *set; 1049 - struct ctl_table *attached_by; 1050 - struct ctl_table *attached_to; 1051 - struct ctl_table_header *parent; 1076 + struct ctl_dir *parent; 1077 + struct ctl_node *node; 1078 + }; 1079 + 1080 + struct ctl_dir { 1081 + /* Header must be at the start of ctl_dir */ 1082 + struct ctl_table_header header; 1083 + struct rb_root root; 1084 + }; 1085 + 1086 + struct ctl_table_set { 1087 + int (*is_seen)(struct ctl_table_set *); 1088 + struct ctl_dir dir; 1089 + }; 1090 + 1091 + struct ctl_table_root { 1092 + struct ctl_table_set default_set; 1093 + struct ctl_table_set *(*lookup)(struct ctl_table_root *root, 1094 + struct nsproxy *namespaces); 1095 + int (*permissions)(struct ctl_table_root *root, 1096 + struct nsproxy *namespaces, struct ctl_table *table); 1052 1097 }; 1053 1098 1054 1099 /* struct ctl_path describes where in the hierarchy a table is added */ ··· 1074 1083 const char *procname; 1075 1084 }; 1076 1085 1086 + #ifdef CONFIG_SYSCTL 1087 + 1088 + void proc_sys_poll_notify(struct ctl_table_poll *poll); 1089 + 1090 + extern void setup_sysctl_set(struct ctl_table_set *p, 1091 + struct ctl_table_root *root, 1092 + int (*is_seen)(struct ctl_table_set *)); 1093 + extern void retire_sysctl_set(struct ctl_table_set *set); 1094 + 1077 1095 void register_sysctl_root(struct ctl_table_root *root); 1096 + struct ctl_table_header *__register_sysctl_table( 1097 + struct ctl_table_set *set, 1098 + const char *path, struct ctl_table *table); 1078 1099 struct ctl_table_header *__register_sysctl_paths( 1079 - struct ctl_table_root *root, struct nsproxy *namespaces, 1100 + struct ctl_table_set *set, 1080 1101 const struct ctl_path *path, struct ctl_table *table); 1102 + struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table); 1081 1103 struct ctl_table_header *register_sysctl_table(struct ctl_table * table); 1082 1104 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, 1083 1105 struct ctl_table *table); 1084 1106 1085 1107 void unregister_sysctl_table(struct ctl_table_header * table); 1086 - int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); 1108 + 1109 + extern int sysctl_init(void); 1110 + #else /* CONFIG_SYSCTL */ 1111 + static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) 1112 + { 1113 + return NULL; 1114 + } 1115 + 1116 + static inline struct ctl_table_header *register_sysctl_paths( 1117 + const struct ctl_path *path, struct ctl_table *table) 1118 + { 1119 + return NULL; 1120 + } 1121 + 1122 + static inline void unregister_sysctl_table(struct ctl_table_header * table) 1123 + { 1124 + } 1125 + 1126 + static inline void setup_sysctl_set(struct ctl_table_set *p, 1127 + struct ctl_table_root *root, 1128 + int (*is_seen)(struct ctl_table_set *)) 1129 + { 1130 + } 1131 + 1132 + #endif /* CONFIG_SYSCTL */ 1087 1133 1088 1134 #endif /* __KERNEL__ */ 1089 1135
-1
kernel/Makefile
··· 27 27 28 28 obj-$(CONFIG_FREEZER) += freezer.o 29 29 obj-$(CONFIG_PROFILING) += profile.o 30 - obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 31 30 obj-$(CONFIG_STACKTRACE) += stacktrace.o 32 31 obj-y += time/ 33 32 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
+3 -498
kernel/sysctl.c
··· 193 193 194 194 #endif 195 195 196 - static struct ctl_table root_table[]; 197 - static struct ctl_table_root sysctl_table_root; 198 - static struct ctl_table_header root_table_header = { 199 - {{.count = 1, 200 - .ctl_table = root_table, 201 - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, 202 - .root = &sysctl_table_root, 203 - .set = &sysctl_table_root.default_set, 204 - }; 205 - static struct ctl_table_root sysctl_table_root = { 206 - .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), 207 - .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), 208 - }; 209 - 210 196 static struct ctl_table kern_table[]; 211 197 static struct ctl_table vm_table[]; 212 198 static struct ctl_table fs_table[]; ··· 209 223 210 224 /* The default sysctl tables: */ 211 225 212 - static struct ctl_table root_table[] = { 226 + static struct ctl_table sysctl_base_table[] = { 213 227 { 214 228 .procname = "kernel", 215 229 .mode = 0555, ··· 1546 1560 { } 1547 1561 }; 1548 1562 1549 - static DEFINE_SPINLOCK(sysctl_lock); 1550 - 1551 - /* called under sysctl_lock */ 1552 - static int use_table(struct ctl_table_header *p) 1563 + int __init sysctl_init(void) 1553 1564 { 1554 - if (unlikely(p->unregistering)) 1555 - return 0; 1556 - p->used++; 1557 - return 1; 1558 - } 1559 - 1560 - /* called under sysctl_lock */ 1561 - static void unuse_table(struct ctl_table_header *p) 1562 - { 1563 - if (!--p->used) 1564 - if (unlikely(p->unregistering)) 1565 - complete(p->unregistering); 1566 - } 1567 - 1568 - /* called under sysctl_lock, will reacquire if has to wait */ 1569 - static void start_unregistering(struct ctl_table_header *p) 1570 - { 1571 - /* 1572 - * if p->used is 0, nobody will ever touch that entry again; 1573 - * we'll eliminate all paths to it before dropping sysctl_lock 1574 - */ 1575 - if (unlikely(p->used)) { 1576 - struct completion wait; 1577 - init_completion(&wait); 1578 - p->unregistering = &wait; 1579 - spin_unlock(&sysctl_lock); 1580 - wait_for_completion(&wait); 1581 - spin_lock(&sysctl_lock); 1582 - } else { 1583 - /* anything non-NULL; we'll never dereference it */ 1584 - p->unregistering = ERR_PTR(-EINVAL); 1585 - } 1586 - /* 1587 - * do not remove from the list until nobody holds it; walking the 1588 - * list in do_sysctl() relies on that. 1589 - */ 1590 - list_del_init(&p->ctl_entry); 1591 - } 1592 - 1593 - void sysctl_head_get(struct ctl_table_header *head) 1594 - { 1595 - spin_lock(&sysctl_lock); 1596 - head->count++; 1597 - spin_unlock(&sysctl_lock); 1598 - } 1599 - 1600 - void sysctl_head_put(struct ctl_table_header *head) 1601 - { 1602 - spin_lock(&sysctl_lock); 1603 - if (!--head->count) 1604 - kfree_rcu(head, rcu); 1605 - spin_unlock(&sysctl_lock); 1606 - } 1607 - 1608 - struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) 1609 - { 1610 - if (!head) 1611 - BUG(); 1612 - spin_lock(&sysctl_lock); 1613 - if (!use_table(head)) 1614 - head = ERR_PTR(-ENOENT); 1615 - spin_unlock(&sysctl_lock); 1616 - return head; 1617 - } 1618 - 1619 - void sysctl_head_finish(struct ctl_table_header *head) 1620 - { 1621 - if (!head) 1622 - return; 1623 - spin_lock(&sysctl_lock); 1624 - unuse_table(head); 1625 - spin_unlock(&sysctl_lock); 1626 - } 1627 - 1628 - static struct ctl_table_set * 1629 - lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) 1630 - { 1631 - struct ctl_table_set *set = &root->default_set; 1632 - if (root->lookup) 1633 - set = root->lookup(root, namespaces); 1634 - return set; 1635 - } 1636 - 1637 - static struct list_head * 1638 - lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) 1639 - { 1640 - struct ctl_table_set *set = lookup_header_set(root, namespaces); 1641 - return &set->list; 1642 - } 1643 - 1644 - struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, 1645 - struct ctl_table_header *prev) 1646 - { 1647 - struct ctl_table_root *root; 1648 - struct list_head *header_list; 1649 - struct ctl_table_header *head; 1650 - struct list_head *tmp; 1651 - 1652 - spin_lock(&sysctl_lock); 1653 - if (prev) { 1654 - head = prev; 1655 - tmp = &prev->ctl_entry; 1656 - unuse_table(prev); 1657 - goto next; 1658 - } 1659 - tmp = &root_table_header.ctl_entry; 1660 - for (;;) { 1661 - head = list_entry(tmp, struct ctl_table_header, ctl_entry); 1662 - 1663 - if (!use_table(head)) 1664 - goto next; 1665 - spin_unlock(&sysctl_lock); 1666 - return head; 1667 - next: 1668 - root = head->root; 1669 - tmp = tmp->next; 1670 - header_list = lookup_header_list(root, namespaces); 1671 - if (tmp != header_list) 1672 - continue; 1673 - 1674 - do { 1675 - root = list_entry(root->root_list.next, 1676 - struct ctl_table_root, root_list); 1677 - if (root == &sysctl_table_root) 1678 - goto out; 1679 - header_list = lookup_header_list(root, namespaces); 1680 - } while (list_empty(header_list)); 1681 - tmp = header_list->next; 1682 - } 1683 - out: 1684 - spin_unlock(&sysctl_lock); 1685 - return NULL; 1686 - } 1687 - 1688 - struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) 1689 - { 1690 - return __sysctl_head_next(current->nsproxy, prev); 1691 - } 1692 - 1693 - void register_sysctl_root(struct ctl_table_root *root) 1694 - { 1695 - spin_lock(&sysctl_lock); 1696 - list_add_tail(&root->root_list, &sysctl_table_root.root_list); 1697 - spin_unlock(&sysctl_lock); 1698 - } 1699 - 1700 - /* 1701 - * sysctl_perm does NOT grant the superuser all rights automatically, because 1702 - * some sysctl variables are readonly even to root. 1703 - */ 1704 - 1705 - static int test_perm(int mode, int op) 1706 - { 1707 - if (!current_euid()) 1708 - mode >>= 6; 1709 - else if (in_egroup_p(0)) 1710 - mode >>= 3; 1711 - if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) 1712 - return 0; 1713 - return -EACCES; 1714 - } 1715 - 1716 - int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) 1717 - { 1718 - int mode; 1719 - 1720 - if (root->permissions) 1721 - mode = root->permissions(root, current->nsproxy, table); 1722 - else 1723 - mode = table->mode; 1724 - 1725 - return test_perm(mode, op); 1726 - } 1727 - 1728 - static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) 1729 - { 1730 - for (; table->procname; table++) { 1731 - table->parent = parent; 1732 - if (table->child) 1733 - sysctl_set_parent(table, table->child); 1734 - } 1735 - } 1736 - 1737 - static __init int sysctl_init(void) 1738 - { 1739 - sysctl_set_parent(NULL, root_table); 1740 - #ifdef CONFIG_SYSCTL_SYSCALL_CHECK 1741 - sysctl_check_table(current->nsproxy, root_table); 1742 - #endif 1565 + register_sysctl_table(sysctl_base_table); 1743 1566 return 0; 1744 - } 1745 - 1746 - core_initcall(sysctl_init); 1747 - 1748 - static struct ctl_table *is_branch_in(struct ctl_table *branch, 1749 - struct ctl_table *table) 1750 - { 1751 - struct ctl_table *p; 1752 - const char *s = branch->procname; 1753 - 1754 - /* branch should have named subdirectory as its first element */ 1755 - if (!s || !branch->child) 1756 - return NULL; 1757 - 1758 - /* ... and nothing else */ 1759 - if (branch[1].procname) 1760 - return NULL; 1761 - 1762 - /* table should contain subdirectory with the same name */ 1763 - for (p = table; p->procname; p++) { 1764 - if (!p->child) 1765 - continue; 1766 - if (p->procname && strcmp(p->procname, s) == 0) 1767 - return p; 1768 - } 1769 - return NULL; 1770 - } 1771 - 1772 - /* see if attaching q to p would be an improvement */ 1773 - static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) 1774 - { 1775 - struct ctl_table *to = p->ctl_table, *by = q->ctl_table; 1776 - struct ctl_table *next; 1777 - int is_better = 0; 1778 - int not_in_parent = !p->attached_by; 1779 - 1780 - while ((next = is_branch_in(by, to)) != NULL) { 1781 - if (by == q->attached_by) 1782 - is_better = 1; 1783 - if (to == p->attached_by) 1784 - not_in_parent = 1; 1785 - by = by->child; 1786 - to = next->child; 1787 - } 1788 - 1789 - if (is_better && not_in_parent) { 1790 - q->attached_by = by; 1791 - q->attached_to = to; 1792 - q->parent = p; 1793 - } 1794 - } 1795 - 1796 - /** 1797 - * __register_sysctl_paths - register a sysctl hierarchy 1798 - * @root: List of sysctl headers to register on 1799 - * @namespaces: Data to compute which lists of sysctl entries are visible 1800 - * @path: The path to the directory the sysctl table is in. 1801 - * @table: the top-level table structure 1802 - * 1803 - * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1804 - * array. A completely 0 filled entry terminates the table. 1805 - * 1806 - * The members of the &struct ctl_table structure are used as follows: 1807 - * 1808 - * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not 1809 - * enter a sysctl file 1810 - * 1811 - * data - a pointer to data for use by proc_handler 1812 - * 1813 - * maxlen - the maximum size in bytes of the data 1814 - * 1815 - * mode - the file permissions for the /proc/sys file, and for sysctl(2) 1816 - * 1817 - * child - a pointer to the child sysctl table if this entry is a directory, or 1818 - * %NULL. 1819 - * 1820 - * proc_handler - the text handler routine (described below) 1821 - * 1822 - * de - for internal use by the sysctl routines 1823 - * 1824 - * extra1, extra2 - extra pointers usable by the proc handler routines 1825 - * 1826 - * Leaf nodes in the sysctl tree will be represented by a single file 1827 - * under /proc; non-leaf nodes will be represented by directories. 1828 - * 1829 - * sysctl(2) can automatically manage read and write requests through 1830 - * the sysctl table. The data and maxlen fields of the ctl_table 1831 - * struct enable minimal validation of the values being written to be 1832 - * performed, and the mode field allows minimal authentication. 1833 - * 1834 - * There must be a proc_handler routine for any terminal nodes 1835 - * mirrored under /proc/sys (non-terminals are handled by a built-in 1836 - * directory handler). Several default handlers are available to 1837 - * cover common cases - 1838 - * 1839 - * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), 1840 - * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 1841 - * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() 1842 - * 1843 - * It is the handler's job to read the input buffer from user memory 1844 - * and process it. The handler should return 0 on success. 1845 - * 1846 - * This routine returns %NULL on a failure to register, and a pointer 1847 - * to the table header on success. 1848 - */ 1849 - struct ctl_table_header *__register_sysctl_paths( 1850 - struct ctl_table_root *root, 1851 - struct nsproxy *namespaces, 1852 - const struct ctl_path *path, struct ctl_table *table) 1853 - { 1854 - struct ctl_table_header *header; 1855 - struct ctl_table *new, **prevp; 1856 - unsigned int n, npath; 1857 - struct ctl_table_set *set; 1858 - 1859 - /* Count the path components */ 1860 - for (npath = 0; path[npath].procname; ++npath) 1861 - ; 1862 - 1863 - /* 1864 - * For each path component, allocate a 2-element ctl_table array. 1865 - * The first array element will be filled with the sysctl entry 1866 - * for this, the second will be the sentinel (procname == 0). 1867 - * 1868 - * We allocate everything in one go so that we don't have to 1869 - * worry about freeing additional memory in unregister_sysctl_table. 1870 - */ 1871 - header = kzalloc(sizeof(struct ctl_table_header) + 1872 - (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); 1873 - if (!header) 1874 - return NULL; 1875 - 1876 - new = (struct ctl_table *) (header + 1); 1877 - 1878 - /* Now connect the dots */ 1879 - prevp = &header->ctl_table; 1880 - for (n = 0; n < npath; ++n, ++path) { 1881 - /* Copy the procname */ 1882 - new->procname = path->procname; 1883 - new->mode = 0555; 1884 - 1885 - *prevp = new; 1886 - prevp = &new->child; 1887 - 1888 - new += 2; 1889 - } 1890 - *prevp = table; 1891 - header->ctl_table_arg = table; 1892 - 1893 - INIT_LIST_HEAD(&header->ctl_entry); 1894 - header->used = 0; 1895 - header->unregistering = NULL; 1896 - header->root = root; 1897 - sysctl_set_parent(NULL, header->ctl_table); 1898 - header->count = 1; 1899 - #ifdef CONFIG_SYSCTL_SYSCALL_CHECK 1900 - if (sysctl_check_table(namespaces, header->ctl_table)) { 1901 - kfree(header); 1902 - return NULL; 1903 - } 1904 - #endif 1905 - spin_lock(&sysctl_lock); 1906 - header->set = lookup_header_set(root, namespaces); 1907 - header->attached_by = header->ctl_table; 1908 - header->attached_to = root_table; 1909 - header->parent = &root_table_header; 1910 - for (set = header->set; set; set = set->parent) { 1911 - struct ctl_table_header *p; 1912 - list_for_each_entry(p, &set->list, ctl_entry) { 1913 - if (p->unregistering) 1914 - continue; 1915 - try_attach(p, header); 1916 - } 1917 - } 1918 - header->parent->count++; 1919 - list_add_tail(&header->ctl_entry, &header->set->list); 1920 - spin_unlock(&sysctl_lock); 1921 - 1922 - return header; 1923 - } 1924 - 1925 - /** 1926 - * register_sysctl_table_path - register a sysctl table hierarchy 1927 - * @path: The path to the directory the sysctl table is in. 1928 - * @table: the top-level table structure 1929 - * 1930 - * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1931 - * array. A completely 0 filled entry terminates the table. 1932 - * 1933 - * See __register_sysctl_paths for more details. 1934 - */ 1935 - struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, 1936 - struct ctl_table *table) 1937 - { 1938 - return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, 1939 - path, table); 1940 - } 1941 - 1942 - /** 1943 - * register_sysctl_table - register a sysctl table hierarchy 1944 - * @table: the top-level table structure 1945 - * 1946 - * Register a sysctl table hierarchy. @table should be a filled in ctl_table 1947 - * array. A completely 0 filled entry terminates the table. 1948 - * 1949 - * See register_sysctl_paths for more details. 1950 - */ 1951 - struct ctl_table_header *register_sysctl_table(struct ctl_table *table) 1952 - { 1953 - static const struct ctl_path null_path[] = { {} }; 1954 - 1955 - return register_sysctl_paths(null_path, table); 1956 - } 1957 - 1958 - /** 1959 - * unregister_sysctl_table - unregister a sysctl table hierarchy 1960 - * @header: the header returned from register_sysctl_table 1961 - * 1962 - * Unregisters the sysctl table and all children. proc entries may not 1963 - * actually be removed until they are no longer used by anyone. 1964 - */ 1965 - void unregister_sysctl_table(struct ctl_table_header * header) 1966 - { 1967 - might_sleep(); 1968 - 1969 - if (header == NULL) 1970 - return; 1971 - 1972 - spin_lock(&sysctl_lock); 1973 - start_unregistering(header); 1974 - if (!--header->parent->count) { 1975 - WARN_ON(1); 1976 - kfree_rcu(header->parent, rcu); 1977 - } 1978 - if (!--header->count) 1979 - kfree_rcu(header, rcu); 1980 - spin_unlock(&sysctl_lock); 1981 - } 1982 - 1983 - int sysctl_is_seen(struct ctl_table_header *p) 1984 - { 1985 - struct ctl_table_set *set = p->set; 1986 - int res; 1987 - spin_lock(&sysctl_lock); 1988 - if (p->unregistering) 1989 - res = 0; 1990 - else if (!set->is_seen) 1991 - res = 1; 1992 - else 1993 - res = set->is_seen(set); 1994 - spin_unlock(&sysctl_lock); 1995 - return res; 1996 - } 1997 - 1998 - void setup_sysctl_set(struct ctl_table_set *p, 1999 - struct ctl_table_set *parent, 2000 - int (*is_seen)(struct ctl_table_set *)) 2001 - { 2002 - INIT_LIST_HEAD(&p->list); 2003 - p->parent = parent ? parent : &sysctl_table_root.default_set; 2004 - p->is_seen = is_seen; 2005 - } 2006 - 2007 - #else /* !CONFIG_SYSCTL */ 2008 - struct ctl_table_header *register_sysctl_table(struct ctl_table * table) 2009 - { 2010 - return NULL; 2011 - } 2012 - 2013 - struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, 2014 - struct ctl_table *table) 2015 - { 2016 - return NULL; 2017 - } 2018 - 2019 - void unregister_sysctl_table(struct ctl_table_header * table) 2020 - { 2021 - } 2022 - 2023 - void setup_sysctl_set(struct ctl_table_set *p, 2024 - struct ctl_table_set *parent, 2025 - int (*is_seen)(struct ctl_table_set *)) 2026 - { 2027 - } 2028 - 2029 - void sysctl_head_put(struct ctl_table_header *head) 2030 - { 2031 1567 } 2032 1568 2033 1569 #endif /* CONFIG_SYSCTL */ ··· 2517 3009 EXPORT_SYMBOL(proc_dostring); 2518 3010 EXPORT_SYMBOL(proc_doulongvec_minmax); 2519 3011 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); 2520 - EXPORT_SYMBOL(register_sysctl_table); 2521 - EXPORT_SYMBOL(register_sysctl_paths); 2522 - EXPORT_SYMBOL(unregister_sysctl_table);
-160
kernel/sysctl_check.c
··· 1 - #include <linux/stat.h> 2 - #include <linux/sysctl.h> 3 - #include "../fs/xfs/xfs_sysctl.h" 4 - #include <linux/sunrpc/debug.h> 5 - #include <linux/string.h> 6 - #include <net/ip_vs.h> 7 - 8 - 9 - static int sysctl_depth(struct ctl_table *table) 10 - { 11 - struct ctl_table *tmp; 12 - int depth; 13 - 14 - depth = 0; 15 - for (tmp = table; tmp->parent; tmp = tmp->parent) 16 - depth++; 17 - 18 - return depth; 19 - } 20 - 21 - static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) 22 - { 23 - int i; 24 - 25 - for (i = 0; table && i < n; i++) 26 - table = table->parent; 27 - 28 - return table; 29 - } 30 - 31 - 32 - static void sysctl_print_path(struct ctl_table *table) 33 - { 34 - struct ctl_table *tmp; 35 - int depth, i; 36 - depth = sysctl_depth(table); 37 - if (table->procname) { 38 - for (i = depth; i >= 0; i--) { 39 - tmp = sysctl_parent(table, i); 40 - printk("/%s", tmp->procname?tmp->procname:""); 41 - } 42 - } 43 - printk(" "); 44 - } 45 - 46 - static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, 47 - struct ctl_table *table) 48 - { 49 - struct ctl_table_header *head; 50 - struct ctl_table *ref, *test; 51 - int depth, cur_depth; 52 - 53 - depth = sysctl_depth(table); 54 - 55 - for (head = __sysctl_head_next(namespaces, NULL); head; 56 - head = __sysctl_head_next(namespaces, head)) { 57 - cur_depth = depth; 58 - ref = head->ctl_table; 59 - repeat: 60 - test = sysctl_parent(table, cur_depth); 61 - for (; ref->procname; ref++) { 62 - int match = 0; 63 - if (cur_depth && !ref->child) 64 - continue; 65 - 66 - if (test->procname && ref->procname && 67 - (strcmp(test->procname, ref->procname) == 0)) 68 - match++; 69 - 70 - if (match) { 71 - if (cur_depth != 0) { 72 - cur_depth--; 73 - ref = ref->child; 74 - goto repeat; 75 - } 76 - goto out; 77 - } 78 - } 79 - } 80 - ref = NULL; 81 - out: 82 - sysctl_head_finish(head); 83 - return ref; 84 - } 85 - 86 - static void set_fail(const char **fail, struct ctl_table *table, const char *str) 87 - { 88 - if (*fail) { 89 - printk(KERN_ERR "sysctl table check failed: "); 90 - sysctl_print_path(table); 91 - printk(" %s\n", *fail); 92 - dump_stack(); 93 - } 94 - *fail = str; 95 - } 96 - 97 - static void sysctl_check_leaf(struct nsproxy *namespaces, 98 - struct ctl_table *table, const char **fail) 99 - { 100 - struct ctl_table *ref; 101 - 102 - ref = sysctl_check_lookup(namespaces, table); 103 - if (ref && (ref != table)) 104 - set_fail(fail, table, "Sysctl already exists"); 105 - } 106 - 107 - int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) 108 - { 109 - int error = 0; 110 - for (; table->procname; table++) { 111 - const char *fail = NULL; 112 - 113 - if (table->parent) { 114 - if (!table->parent->procname) 115 - set_fail(&fail, table, "Parent without procname"); 116 - } 117 - if (table->child) { 118 - if (table->data) 119 - set_fail(&fail, table, "Directory with data?"); 120 - if (table->maxlen) 121 - set_fail(&fail, table, "Directory with maxlen?"); 122 - if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) 123 - set_fail(&fail, table, "Writable sysctl directory"); 124 - if (table->proc_handler) 125 - set_fail(&fail, table, "Directory with proc_handler"); 126 - if (table->extra1) 127 - set_fail(&fail, table, "Directory with extra1"); 128 - if (table->extra2) 129 - set_fail(&fail, table, "Directory with extra2"); 130 - } else { 131 - if ((table->proc_handler == proc_dostring) || 132 - (table->proc_handler == proc_dointvec) || 133 - (table->proc_handler == proc_dointvec_minmax) || 134 - (table->proc_handler == proc_dointvec_jiffies) || 135 - (table->proc_handler == proc_dointvec_userhz_jiffies) || 136 - (table->proc_handler == proc_dointvec_ms_jiffies) || 137 - (table->proc_handler == proc_doulongvec_minmax) || 138 - (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { 139 - if (!table->data) 140 - set_fail(&fail, table, "No data"); 141 - if (!table->maxlen) 142 - set_fail(&fail, table, "No maxlen"); 143 - } 144 - #ifdef CONFIG_PROC_SYSCTL 145 - if (!table->proc_handler) 146 - set_fail(&fail, table, "No proc_handler"); 147 - #endif 148 - sysctl_check_leaf(namespaces, table, &fail); 149 - } 150 - if (table->mode > 0777) 151 - set_fail(&fail, table, "bogus .mode"); 152 - if (fail) { 153 - set_fail(&fail, table, NULL); 154 - error = -EINVAL; 155 - } 156 - if (table->child) 157 - error |= sysctl_check_table(namespaces, table->child); 158 - } 159 - return error; 160 - }
-8
lib/Kconfig.debug
··· 1141 1141 Enable this option if you want to use the LatencyTOP tool 1142 1142 to find out which userspace is blocking on what kernel operations. 1143 1143 1144 - config SYSCTL_SYSCALL_CHECK 1145 - bool "Sysctl checks" 1146 - depends on SYSCTL 1147 - ---help--- 1148 - sys_sysctl uses binary paths that have been found challenging 1149 - to properly maintain and use. This enables checks that help 1150 - you to keep things correct. 1151 - 1152 1144 source mm/Kconfig.debug 1153 1145 source kernel/trace/Kconfig 1154 1146
+9 -15
net/sysctl_net.c
··· 74 74 75 75 static int __net_init sysctl_net_init(struct net *net) 76 76 { 77 - setup_sysctl_set(&net->sysctls, 78 - &net_sysctl_ro_root.default_set, 79 - is_seen); 77 + setup_sysctl_set(&net->sysctls, &net_sysctl_root, is_seen); 80 78 return 0; 81 79 } 82 80 83 81 static void __net_exit sysctl_net_exit(struct net *net) 84 82 { 85 - WARN_ON(!list_empty(&net->sysctls.list)); 83 + retire_sysctl_set(&net->sysctls); 86 84 } 87 85 88 86 static struct pernet_operations sysctl_pernet_ops = { ··· 88 90 .exit = sysctl_net_exit, 89 91 }; 90 92 91 - static __init int sysctl_init(void) 93 + static __init int net_sysctl_init(void) 92 94 { 93 95 int ret; 94 96 ret = register_pernet_subsys(&sysctl_pernet_ops); 95 97 if (ret) 96 98 goto out; 97 - register_sysctl_root(&net_sysctl_root); 98 - setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL); 99 + setup_sysctl_set(&net_sysctl_ro_root.default_set, &net_sysctl_ro_root, NULL); 99 100 register_sysctl_root(&net_sysctl_ro_root); 101 + register_sysctl_root(&net_sysctl_root); 100 102 out: 101 103 return ret; 102 104 } 103 - subsys_initcall(sysctl_init); 105 + subsys_initcall(net_sysctl_init); 104 106 105 107 struct ctl_table_header *register_net_sysctl_table(struct net *net, 106 108 const struct ctl_path *path, struct ctl_table *table) 107 109 { 108 - struct nsproxy namespaces; 109 - namespaces = *current->nsproxy; 110 - namespaces.net_ns = net; 111 - return __register_sysctl_paths(&net_sysctl_root, 112 - &namespaces, path, table); 110 + return __register_sysctl_paths(&net->sysctls, path, table); 113 111 } 114 112 EXPORT_SYMBOL_GPL(register_net_sysctl_table); 115 113 116 114 struct ctl_table_header *register_net_sysctl_rotable(const 117 115 struct ctl_path *path, struct ctl_table *table) 118 116 { 119 - return __register_sysctl_paths(&net_sysctl_ro_root, 120 - &init_nsproxy, path, table); 117 + return __register_sysctl_paths(&net_sysctl_ro_root.default_set, 118 + path, table); 121 119 } 122 120 EXPORT_SYMBOL_GPL(register_net_sysctl_rotable); 123 121