Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs/proc: use a rb tree for the directory entries

When a lot of netdevices are created, one of the bottleneck is the
creation of proc entries. This serie aims to accelerate this part.

The current implementation for the directories in /proc is using a single
linked list. This is slow when handling directories with large numbers of
entries (eg netdevice-related entries when lots of tunnels are opened).

This patch replaces this linked list by a red-black tree.

Here are some numbers:

dummy30000.batch contains 30 000 times 'link add type dummy'.

Before the patch:
$ time ip -b dummy30000.batch
real 2m31.950s
user 0m0.440s
sys 2m21.440s
$ time rmmod dummy
real 1m35.764s
user 0m0.000s
sys 1m24.088s

After the patch:
$ time ip -b dummy30000.batch
real 2m0.874s
user 0m0.448s
sys 1m49.720s
$ time rmmod dummy
real 1m13.988s
user 0m0.000s
sys 1m1.008s

The idea of improving this part was suggested by Thierry Herbelot.

[akpm@linux-foundation.org: initialise proc_root.subdir at compile time]
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Thierry Herbelot <thierry.herbelot@6wind.com>.
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Nicolas Dichtel and committed by
Linus Torvalds
710585d4 9edad6ea

+113 -64
+105 -59
fs/proc/generic.c
··· 31 31 32 32 static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) 33 33 { 34 - if (de->namelen != len) 35 - return 0; 36 - return !memcmp(name, de->name, len); 34 + if (len < de->namelen) 35 + return -1; 36 + if (len > de->namelen) 37 + return 1; 38 + 39 + return memcmp(name, de->name, len); 40 + } 41 + 42 + static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) 43 + { 44 + struct rb_node *node = rb_first(&dir->subdir); 45 + 46 + if (node == NULL) 47 + return NULL; 48 + 49 + return rb_entry(node, struct proc_dir_entry, subdir_node); 50 + } 51 + 52 + static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) 53 + { 54 + struct rb_node *node = rb_next(&dir->subdir_node); 55 + 56 + if (node == NULL) 57 + return NULL; 58 + 59 + return rb_entry(node, struct proc_dir_entry, subdir_node); 60 + } 61 + 62 + static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, 63 + const char *name, 64 + unsigned int len) 65 + { 66 + struct rb_node *node = dir->subdir.rb_node; 67 + 68 + while (node) { 69 + struct proc_dir_entry *de = container_of(node, 70 + struct proc_dir_entry, 71 + subdir_node); 72 + int result = proc_match(len, name, de); 73 + 74 + if (result < 0) 75 + node = node->rb_left; 76 + else if (result > 0) 77 + node = node->rb_right; 78 + else 79 + return de; 80 + } 81 + return NULL; 82 + } 83 + 84 + static bool pde_subdir_insert(struct proc_dir_entry *dir, 85 + struct proc_dir_entry *de) 86 + { 87 + struct rb_root *root = &dir->subdir; 88 + struct rb_node **new = &root->rb_node, *parent = NULL; 89 + 90 + /* Figure out where to put new node */ 91 + while (*new) { 92 + struct proc_dir_entry *this = 93 + container_of(*new, struct proc_dir_entry, subdir_node); 94 + int result = proc_match(de->namelen, de->name, this); 95 + 96 + parent = *new; 97 + if (result < 0) 98 + new = &(*new)->rb_left; 99 + else if (result > 0) 100 + new = &(*new)->rb_right; 101 + else 102 + return false; 103 + } 104 + 105 + /* Add new node and rebalance tree. */ 106 + rb_link_node(&de->subdir_node, parent, new); 107 + rb_insert_color(&de->subdir_node, root); 108 + return true; 37 109 } 38 110 39 111 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) ··· 164 92 break; 165 93 166 94 len = next - cp; 167 - for (de = de->subdir; de ; de = de->next) { 168 - if (proc_match(len, cp, de)) 169 - break; 170 - } 95 + de = pde_subdir_find(de, cp, len); 171 96 if (!de) { 172 97 WARN(1, "name '%s'\n", name); 173 98 return -ENOENT; ··· 252 183 struct inode *inode; 253 184 254 185 spin_lock(&proc_subdir_lock); 255 - for (de = de->subdir; de ; de = de->next) { 256 - if (de->namelen != dentry->d_name.len) 257 - continue; 258 - if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 259 - pde_get(de); 260 - spin_unlock(&proc_subdir_lock); 261 - inode = proc_get_inode(dir->i_sb, de); 262 - if (!inode) 263 - return ERR_PTR(-ENOMEM); 264 - d_set_d_op(dentry, &simple_dentry_operations); 265 - d_add(dentry, inode); 266 - return NULL; 267 - } 186 + de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); 187 + if (de) { 188 + pde_get(de); 189 + spin_unlock(&proc_subdir_lock); 190 + inode = proc_get_inode(dir->i_sb, de); 191 + if (!inode) 192 + return ERR_PTR(-ENOMEM); 193 + d_set_d_op(dentry, &simple_dentry_operations); 194 + d_add(dentry, inode); 195 + return NULL; 268 196 } 269 197 spin_unlock(&proc_subdir_lock); 270 198 return ERR_PTR(-ENOENT); ··· 291 225 return 0; 292 226 293 227 spin_lock(&proc_subdir_lock); 294 - de = de->subdir; 228 + de = pde_subdir_first(de); 295 229 i = ctx->pos - 2; 296 230 for (;;) { 297 231 if (!de) { ··· 300 234 } 301 235 if (!i) 302 236 break; 303 - de = de->next; 237 + de = pde_subdir_next(de); 304 238 i--; 305 239 } 306 240 ··· 315 249 } 316 250 spin_lock(&proc_subdir_lock); 317 251 ctx->pos++; 318 - next = de->next; 252 + next = pde_subdir_next(de); 319 253 pde_put(de); 320 254 de = next; 321 255 } while (de); ··· 352 286 353 287 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 354 288 { 355 - struct proc_dir_entry *tmp; 356 289 int ret; 357 - 290 + 358 291 ret = proc_alloc_inum(&dp->low_ino); 359 292 if (ret) 360 293 return ret; ··· 373 308 } 374 309 375 310 spin_lock(&proc_subdir_lock); 376 - 377 - for (tmp = dir->subdir; tmp; tmp = tmp->next) 378 - if (strcmp(tmp->name, dp->name) == 0) { 379 - WARN(1, "proc_dir_entry '%s/%s' already registered\n", 380 - dir->name, dp->name); 381 - break; 382 - } 383 - 384 - dp->next = dir->subdir; 385 311 dp->parent = dir; 386 - dir->subdir = dp; 312 + if (pde_subdir_insert(dir, dp) == false) 313 + WARN(1, "proc_dir_entry '%s/%s' already registered\n", 314 + dir->name, dp->name); 387 315 spin_unlock(&proc_subdir_lock); 388 316 389 317 return 0; ··· 412 354 ent->namelen = qstr.len; 413 355 ent->mode = mode; 414 356 ent->nlink = nlink; 357 + ent->subdir = RB_ROOT; 415 358 atomic_set(&ent->count, 1); 416 359 spin_lock_init(&ent->pde_unload_lock); 417 360 INIT_LIST_HEAD(&ent->pde_openers); ··· 544 485 */ 545 486 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 546 487 { 547 - struct proc_dir_entry **p; 548 488 struct proc_dir_entry *de = NULL; 549 489 const char *fn = name; 550 490 unsigned int len; ··· 555 497 } 556 498 len = strlen(fn); 557 499 558 - for (p = &parent->subdir; *p; p=&(*p)->next ) { 559 - if (proc_match(len, fn, *p)) { 560 - de = *p; 561 - *p = de->next; 562 - de->next = NULL; 563 - break; 564 - } 565 - } 500 + de = pde_subdir_find(parent, fn, len); 501 + if (de) 502 + rb_erase(&de->subdir_node, &parent->subdir); 566 503 spin_unlock(&proc_subdir_lock); 567 504 if (!de) { 568 505 WARN(1, "name '%s'\n", name); ··· 569 516 if (S_ISDIR(de->mode)) 570 517 parent->nlink--; 571 518 de->nlink = 0; 572 - WARN(de->subdir, "%s: removing non-empty directory " 573 - "'%s/%s', leaking at least '%s'\n", __func__, 574 - de->parent->name, de->name, de->subdir->name); 519 + WARN(pde_subdir_first(de), 520 + "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n", 521 + __func__, de->parent->name, de->name, pde_subdir_first(de)->name); 575 522 pde_put(de); 576 523 } 577 524 EXPORT_SYMBOL(remove_proc_entry); 578 525 579 526 int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) 580 527 { 581 - struct proc_dir_entry **p; 582 528 struct proc_dir_entry *root = NULL, *de, *next; 583 529 const char *fn = name; 584 530 unsigned int len; ··· 589 537 } 590 538 len = strlen(fn); 591 539 592 - for (p = &parent->subdir; *p; p=&(*p)->next ) { 593 - if (proc_match(len, fn, *p)) { 594 - root = *p; 595 - *p = root->next; 596 - root->next = NULL; 597 - break; 598 - } 599 - } 540 + root = pde_subdir_find(parent, fn, len); 600 541 if (!root) { 601 542 spin_unlock(&proc_subdir_lock); 602 543 return -ENOENT; 603 544 } 545 + rb_erase(&root->subdir_node, &parent->subdir); 546 + 604 547 de = root; 605 548 while (1) { 606 - next = de->subdir; 549 + next = pde_subdir_first(de); 607 550 if (next) { 608 - de->subdir = next->next; 609 - next->next = NULL; 551 + rb_erase(&next->subdir_node, &de->subdir); 610 552 de = next; 611 553 continue; 612 554 }
+6 -5
fs/proc/internal.h
··· 24 24 * tree) of these proc_dir_entries, so that we can dynamically 25 25 * add new files to /proc. 26 26 * 27 - * The "next" pointer creates a linked list of one /proc directory, 28 - * while parent/subdir create the directory structure (every 29 - * /proc file has a parent, but "subdir" is NULL for all 30 - * non-directory entries). 27 + * parent/subdir are used for the directory structure (every /proc file has a 28 + * parent, but "subdir" is empty for all non-directory entries). 29 + * subdir_node is used to build the rb tree "subdir" of the parent. 31 30 */ 32 31 struct proc_dir_entry { 33 32 unsigned int low_ino; ··· 37 38 loff_t size; 38 39 const struct inode_operations *proc_iops; 39 40 const struct file_operations *proc_fops; 40 - struct proc_dir_entry *next, *parent, *subdir; 41 + struct proc_dir_entry *parent; 42 + struct rb_root subdir; 43 + struct rb_node subdir_node; 41 44 void *data; 42 45 atomic_t count; /* use count */ 43 46 atomic_t in_use; /* number of callers into module in progress; */
+1
fs/proc/proc_net.c
··· 192 192 if (!netd) 193 193 goto out; 194 194 195 + netd->subdir = RB_ROOT; 195 196 netd->data = net; 196 197 netd->nlink = 2; 197 198 netd->namelen = 3;
+1
fs/proc/root.c
··· 251 251 .proc_iops = &proc_root_inode_operations, 252 252 .proc_fops = &proc_root_operations, 253 253 .parent = &proc_root, 254 + .subdir = RB_ROOT, 254 255 .name = "/proc", 255 256 }; 256 257