at v2.6.18-rc7 1798 lines 47 kB view raw
1/* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9/* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17#include <linux/syscalls.h> 18#include <linux/string.h> 19#include <linux/mm.h> 20#include <linux/fs.h> 21#include <linux/fsnotify.h> 22#include <linux/slab.h> 23#include <linux/init.h> 24#include <linux/smp_lock.h> 25#include <linux/hash.h> 26#include <linux/cache.h> 27#include <linux/module.h> 28#include <linux/mount.h> 29#include <linux/file.h> 30#include <asm/uaccess.h> 31#include <linux/security.h> 32#include <linux/seqlock.h> 33#include <linux/swap.h> 34#include <linux/bootmem.h> 35 36 37int sysctl_vfs_cache_pressure __read_mostly = 100; 38EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 39 40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 41static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 42 43EXPORT_SYMBOL(dcache_lock); 44 45static kmem_cache_t *dentry_cache __read_mostly; 46 47#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) 48 49/* 50 * This is the single most critical data structure when it comes 51 * to the dcache: the hashtable for lookups. Somebody should try 52 * to make this good - I've just made it work. 53 * 54 * This hash-function tries to avoid losing too many bits of hash 55 * information, yet avoid using a prime hash-size or similar. 56 */ 57#define D_HASHBITS d_hash_shift 58#define D_HASHMASK d_hash_mask 59 60static unsigned int d_hash_mask __read_mostly; 61static unsigned int d_hash_shift __read_mostly; 62static struct hlist_head *dentry_hashtable __read_mostly; 63static LIST_HEAD(dentry_unused); 64 65/* Statistics gathering. */ 66struct dentry_stat_t dentry_stat = { 67 .age_limit = 45, 68}; 69 70static void d_callback(struct rcu_head *head) 71{ 72 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); 73 74 if (dname_external(dentry)) 75 kfree(dentry->d_name.name); 76 kmem_cache_free(dentry_cache, dentry); 77} 78 79/* 80 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 81 * inside dcache_lock. 82 */ 83static void d_free(struct dentry *dentry) 84{ 85 if (dentry->d_op && dentry->d_op->d_release) 86 dentry->d_op->d_release(dentry); 87 call_rcu(&dentry->d_u.d_rcu, d_callback); 88} 89 90/* 91 * Release the dentry's inode, using the filesystem 92 * d_iput() operation if defined. 93 * Called with dcache_lock and per dentry lock held, drops both. 94 */ 95static void dentry_iput(struct dentry * dentry) 96{ 97 struct inode *inode = dentry->d_inode; 98 if (inode) { 99 dentry->d_inode = NULL; 100 list_del_init(&dentry->d_alias); 101 spin_unlock(&dentry->d_lock); 102 spin_unlock(&dcache_lock); 103 if (!inode->i_nlink) 104 fsnotify_inoderemove(inode); 105 if (dentry->d_op && dentry->d_op->d_iput) 106 dentry->d_op->d_iput(dentry, inode); 107 else 108 iput(inode); 109 } else { 110 spin_unlock(&dentry->d_lock); 111 spin_unlock(&dcache_lock); 112 } 113} 114 115/* 116 * This is dput 117 * 118 * This is complicated by the fact that we do not want to put 119 * dentries that are no longer on any hash chain on the unused 120 * list: we'd much rather just get rid of them immediately. 121 * 122 * However, that implies that we have to traverse the dentry 123 * tree upwards to the parents which might _also_ now be 124 * scheduled for deletion (it may have been only waiting for 125 * its last child to go away). 126 * 127 * This tail recursion is done by hand as we don't want to depend 128 * on the compiler to always get this right (gcc generally doesn't). 129 * Real recursion would eat up our stack space. 130 */ 131 132/* 133 * dput - release a dentry 134 * @dentry: dentry to release 135 * 136 * Release a dentry. This will drop the usage count and if appropriate 137 * call the dentry unlink method as well as removing it from the queues and 138 * releasing its resources. If the parent dentries were scheduled for release 139 * they too may now get deleted. 140 * 141 * no dcache lock, please. 142 */ 143 144void dput(struct dentry *dentry) 145{ 146 if (!dentry) 147 return; 148 149repeat: 150 if (atomic_read(&dentry->d_count) == 1) 151 might_sleep(); 152 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) 153 return; 154 155 spin_lock(&dentry->d_lock); 156 if (atomic_read(&dentry->d_count)) { 157 spin_unlock(&dentry->d_lock); 158 spin_unlock(&dcache_lock); 159 return; 160 } 161 162 /* 163 * AV: ->d_delete() is _NOT_ allowed to block now. 164 */ 165 if (dentry->d_op && dentry->d_op->d_delete) { 166 if (dentry->d_op->d_delete(dentry)) 167 goto unhash_it; 168 } 169 /* Unreachable? Get rid of it */ 170 if (d_unhashed(dentry)) 171 goto kill_it; 172 if (list_empty(&dentry->d_lru)) { 173 dentry->d_flags |= DCACHE_REFERENCED; 174 list_add(&dentry->d_lru, &dentry_unused); 175 dentry_stat.nr_unused++; 176 } 177 spin_unlock(&dentry->d_lock); 178 spin_unlock(&dcache_lock); 179 return; 180 181unhash_it: 182 __d_drop(dentry); 183 184kill_it: { 185 struct dentry *parent; 186 187 /* If dentry was on d_lru list 188 * delete it from there 189 */ 190 if (!list_empty(&dentry->d_lru)) { 191 list_del(&dentry->d_lru); 192 dentry_stat.nr_unused--; 193 } 194 list_del(&dentry->d_u.d_child); 195 dentry_stat.nr_dentry--; /* For d_free, below */ 196 /*drops the locks, at that point nobody can reach this dentry */ 197 dentry_iput(dentry); 198 parent = dentry->d_parent; 199 d_free(dentry); 200 if (dentry == parent) 201 return; 202 dentry = parent; 203 goto repeat; 204 } 205} 206 207/** 208 * d_invalidate - invalidate a dentry 209 * @dentry: dentry to invalidate 210 * 211 * Try to invalidate the dentry if it turns out to be 212 * possible. If there are other dentries that can be 213 * reached through this one we can't delete it and we 214 * return -EBUSY. On success we return 0. 215 * 216 * no dcache lock. 217 */ 218 219int d_invalidate(struct dentry * dentry) 220{ 221 /* 222 * If it's already been dropped, return OK. 223 */ 224 spin_lock(&dcache_lock); 225 if (d_unhashed(dentry)) { 226 spin_unlock(&dcache_lock); 227 return 0; 228 } 229 /* 230 * Check whether to do a partial shrink_dcache 231 * to get rid of unused child entries. 232 */ 233 if (!list_empty(&dentry->d_subdirs)) { 234 spin_unlock(&dcache_lock); 235 shrink_dcache_parent(dentry); 236 spin_lock(&dcache_lock); 237 } 238 239 /* 240 * Somebody else still using it? 241 * 242 * If it's a directory, we can't drop it 243 * for fear of somebody re-populating it 244 * with children (even though dropping it 245 * would make it unreachable from the root, 246 * we might still populate it if it was a 247 * working directory or similar). 248 */ 249 spin_lock(&dentry->d_lock); 250 if (atomic_read(&dentry->d_count) > 1) { 251 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 252 spin_unlock(&dentry->d_lock); 253 spin_unlock(&dcache_lock); 254 return -EBUSY; 255 } 256 } 257 258 __d_drop(dentry); 259 spin_unlock(&dentry->d_lock); 260 spin_unlock(&dcache_lock); 261 return 0; 262} 263 264/* This should be called _only_ with dcache_lock held */ 265 266static inline struct dentry * __dget_locked(struct dentry *dentry) 267{ 268 atomic_inc(&dentry->d_count); 269 if (!list_empty(&dentry->d_lru)) { 270 dentry_stat.nr_unused--; 271 list_del_init(&dentry->d_lru); 272 } 273 return dentry; 274} 275 276struct dentry * dget_locked(struct dentry *dentry) 277{ 278 return __dget_locked(dentry); 279} 280 281/** 282 * d_find_alias - grab a hashed alias of inode 283 * @inode: inode in question 284 * @want_discon: flag, used by d_splice_alias, to request 285 * that only a DISCONNECTED alias be returned. 286 * 287 * If inode has a hashed alias, or is a directory and has any alias, 288 * acquire the reference to alias and return it. Otherwise return NULL. 289 * Notice that if inode is a directory there can be only one alias and 290 * it can be unhashed only if it has no children, or if it is the root 291 * of a filesystem. 292 * 293 * If the inode has a DCACHE_DISCONNECTED alias, then prefer 294 * any other hashed alias over that one unless @want_discon is set, 295 * in which case only return a DCACHE_DISCONNECTED alias. 296 */ 297 298static struct dentry * __d_find_alias(struct inode *inode, int want_discon) 299{ 300 struct list_head *head, *next, *tmp; 301 struct dentry *alias, *discon_alias=NULL; 302 303 head = &inode->i_dentry; 304 next = inode->i_dentry.next; 305 while (next != head) { 306 tmp = next; 307 next = tmp->next; 308 prefetch(next); 309 alias = list_entry(tmp, struct dentry, d_alias); 310 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 311 if (alias->d_flags & DCACHE_DISCONNECTED) 312 discon_alias = alias; 313 else if (!want_discon) { 314 __dget_locked(alias); 315 return alias; 316 } 317 } 318 } 319 if (discon_alias) 320 __dget_locked(discon_alias); 321 return discon_alias; 322} 323 324struct dentry * d_find_alias(struct inode *inode) 325{ 326 struct dentry *de = NULL; 327 328 if (!list_empty(&inode->i_dentry)) { 329 spin_lock(&dcache_lock); 330 de = __d_find_alias(inode, 0); 331 spin_unlock(&dcache_lock); 332 } 333 return de; 334} 335 336/* 337 * Try to kill dentries associated with this inode. 338 * WARNING: you must own a reference to inode. 339 */ 340void d_prune_aliases(struct inode *inode) 341{ 342 struct dentry *dentry; 343restart: 344 spin_lock(&dcache_lock); 345 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 346 spin_lock(&dentry->d_lock); 347 if (!atomic_read(&dentry->d_count)) { 348 __dget_locked(dentry); 349 __d_drop(dentry); 350 spin_unlock(&dentry->d_lock); 351 spin_unlock(&dcache_lock); 352 dput(dentry); 353 goto restart; 354 } 355 spin_unlock(&dentry->d_lock); 356 } 357 spin_unlock(&dcache_lock); 358} 359 360/* 361 * Throw away a dentry - free the inode, dput the parent. This requires that 362 * the LRU list has already been removed. 363 * 364 * Called with dcache_lock, drops it and then regains. 365 * Called with dentry->d_lock held, drops it. 366 */ 367static void prune_one_dentry(struct dentry * dentry) 368{ 369 struct dentry * parent; 370 371 __d_drop(dentry); 372 list_del(&dentry->d_u.d_child); 373 dentry_stat.nr_dentry--; /* For d_free, below */ 374 dentry_iput(dentry); 375 parent = dentry->d_parent; 376 d_free(dentry); 377 if (parent != dentry) 378 dput(parent); 379 spin_lock(&dcache_lock); 380} 381 382/** 383 * prune_dcache - shrink the dcache 384 * @count: number of entries to try and free 385 * @sb: if given, ignore dentries for other superblocks 386 * which are being unmounted. 387 * 388 * Shrink the dcache. This is done when we need 389 * more memory, or simply when we need to unmount 390 * something (at which point we need to unuse 391 * all dentries). 392 * 393 * This function may fail to free any resources if 394 * all the dentries are in use. 395 */ 396 397static void prune_dcache(int count, struct super_block *sb) 398{ 399 spin_lock(&dcache_lock); 400 for (; count ; count--) { 401 struct dentry *dentry; 402 struct list_head *tmp; 403 struct rw_semaphore *s_umount; 404 405 cond_resched_lock(&dcache_lock); 406 407 tmp = dentry_unused.prev; 408 if (sb) { 409 /* Try to find a dentry for this sb, but don't try 410 * too hard, if they aren't near the tail they will 411 * be moved down again soon 412 */ 413 int skip = count; 414 while (skip && tmp != &dentry_unused && 415 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { 416 skip--; 417 tmp = tmp->prev; 418 } 419 } 420 if (tmp == &dentry_unused) 421 break; 422 list_del_init(tmp); 423 prefetch(dentry_unused.prev); 424 dentry_stat.nr_unused--; 425 dentry = list_entry(tmp, struct dentry, d_lru); 426 427 spin_lock(&dentry->d_lock); 428 /* 429 * We found an inuse dentry which was not removed from 430 * dentry_unused because of laziness during lookup. Do not free 431 * it - just keep it off the dentry_unused list. 432 */ 433 if (atomic_read(&dentry->d_count)) { 434 spin_unlock(&dentry->d_lock); 435 continue; 436 } 437 /* If the dentry was recently referenced, don't free it. */ 438 if (dentry->d_flags & DCACHE_REFERENCED) { 439 dentry->d_flags &= ~DCACHE_REFERENCED; 440 list_add(&dentry->d_lru, &dentry_unused); 441 dentry_stat.nr_unused++; 442 spin_unlock(&dentry->d_lock); 443 continue; 444 } 445 /* 446 * If the dentry is not DCACHED_REFERENCED, it is time 447 * to remove it from the dcache, provided the super block is 448 * NULL (which means we are trying to reclaim memory) 449 * or this dentry belongs to the same super block that 450 * we want to shrink. 451 */ 452 /* 453 * If this dentry is for "my" filesystem, then I can prune it 454 * without taking the s_umount lock (I already hold it). 455 */ 456 if (sb && dentry->d_sb == sb) { 457 prune_one_dentry(dentry); 458 continue; 459 } 460 /* 461 * ...otherwise we need to be sure this filesystem isn't being 462 * unmounted, otherwise we could race with 463 * generic_shutdown_super(), and end up holding a reference to 464 * an inode while the filesystem is unmounted. 465 * So we try to get s_umount, and make sure s_root isn't NULL. 466 * (Take a local copy of s_umount to avoid a use-after-free of 467 * `dentry'). 468 */ 469 s_umount = &dentry->d_sb->s_umount; 470 if (down_read_trylock(s_umount)) { 471 if (dentry->d_sb->s_root != NULL) { 472 prune_one_dentry(dentry); 473 up_read(s_umount); 474 continue; 475 } 476 up_read(s_umount); 477 } 478 spin_unlock(&dentry->d_lock); 479 /* Cannot remove the first dentry, and it isn't appropriate 480 * to move it to the head of the list, so give up, and try 481 * later 482 */ 483 break; 484 } 485 spin_unlock(&dcache_lock); 486} 487 488/* 489 * Shrink the dcache for the specified super block. 490 * This allows us to unmount a device without disturbing 491 * the dcache for the other devices. 492 * 493 * This implementation makes just two traversals of the 494 * unused list. On the first pass we move the selected 495 * dentries to the most recent end, and on the second 496 * pass we free them. The second pass must restart after 497 * each dput(), but since the target dentries are all at 498 * the end, it's really just a single traversal. 499 */ 500 501/** 502 * shrink_dcache_sb - shrink dcache for a superblock 503 * @sb: superblock 504 * 505 * Shrink the dcache for the specified super block. This 506 * is used to free the dcache before unmounting a file 507 * system 508 */ 509 510void shrink_dcache_sb(struct super_block * sb) 511{ 512 struct list_head *tmp, *next; 513 struct dentry *dentry; 514 515 /* 516 * Pass one ... move the dentries for the specified 517 * superblock to the most recent end of the unused list. 518 */ 519 spin_lock(&dcache_lock); 520 list_for_each_safe(tmp, next, &dentry_unused) { 521 dentry = list_entry(tmp, struct dentry, d_lru); 522 if (dentry->d_sb != sb) 523 continue; 524 list_move(tmp, &dentry_unused); 525 } 526 527 /* 528 * Pass two ... free the dentries for this superblock. 529 */ 530repeat: 531 list_for_each_safe(tmp, next, &dentry_unused) { 532 dentry = list_entry(tmp, struct dentry, d_lru); 533 if (dentry->d_sb != sb) 534 continue; 535 dentry_stat.nr_unused--; 536 list_del_init(tmp); 537 spin_lock(&dentry->d_lock); 538 if (atomic_read(&dentry->d_count)) { 539 spin_unlock(&dentry->d_lock); 540 continue; 541 } 542 prune_one_dentry(dentry); 543 cond_resched_lock(&dcache_lock); 544 goto repeat; 545 } 546 spin_unlock(&dcache_lock); 547} 548 549/* 550 * Search for at least 1 mount point in the dentry's subdirs. 551 * We descend to the next level whenever the d_subdirs 552 * list is non-empty and continue searching. 553 */ 554 555/** 556 * have_submounts - check for mounts over a dentry 557 * @parent: dentry to check. 558 * 559 * Return true if the parent or its subdirectories contain 560 * a mount point 561 */ 562 563int have_submounts(struct dentry *parent) 564{ 565 struct dentry *this_parent = parent; 566 struct list_head *next; 567 568 spin_lock(&dcache_lock); 569 if (d_mountpoint(parent)) 570 goto positive; 571repeat: 572 next = this_parent->d_subdirs.next; 573resume: 574 while (next != &this_parent->d_subdirs) { 575 struct list_head *tmp = next; 576 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 577 next = tmp->next; 578 /* Have we found a mount point ? */ 579 if (d_mountpoint(dentry)) 580 goto positive; 581 if (!list_empty(&dentry->d_subdirs)) { 582 this_parent = dentry; 583 goto repeat; 584 } 585 } 586 /* 587 * All done at this level ... ascend and resume the search. 588 */ 589 if (this_parent != parent) { 590 next = this_parent->d_u.d_child.next; 591 this_parent = this_parent->d_parent; 592 goto resume; 593 } 594 spin_unlock(&dcache_lock); 595 return 0; /* No mount points found in tree */ 596positive: 597 spin_unlock(&dcache_lock); 598 return 1; 599} 600 601/* 602 * Search the dentry child list for the specified parent, 603 * and move any unused dentries to the end of the unused 604 * list for prune_dcache(). We descend to the next level 605 * whenever the d_subdirs list is non-empty and continue 606 * searching. 607 * 608 * It returns zero iff there are no unused children, 609 * otherwise it returns the number of children moved to 610 * the end of the unused list. This may not be the total 611 * number of unused children, because select_parent can 612 * drop the lock and return early due to latency 613 * constraints. 614 */ 615static int select_parent(struct dentry * parent) 616{ 617 struct dentry *this_parent = parent; 618 struct list_head *next; 619 int found = 0; 620 621 spin_lock(&dcache_lock); 622repeat: 623 next = this_parent->d_subdirs.next; 624resume: 625 while (next != &this_parent->d_subdirs) { 626 struct list_head *tmp = next; 627 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 628 next = tmp->next; 629 630 if (!list_empty(&dentry->d_lru)) { 631 dentry_stat.nr_unused--; 632 list_del_init(&dentry->d_lru); 633 } 634 /* 635 * move only zero ref count dentries to the end 636 * of the unused list for prune_dcache 637 */ 638 if (!atomic_read(&dentry->d_count)) { 639 list_add_tail(&dentry->d_lru, &dentry_unused); 640 dentry_stat.nr_unused++; 641 found++; 642 } 643 644 /* 645 * We can return to the caller if we have found some (this 646 * ensures forward progress). We'll be coming back to find 647 * the rest. 648 */ 649 if (found && need_resched()) 650 goto out; 651 652 /* 653 * Descend a level if the d_subdirs list is non-empty. 654 */ 655 if (!list_empty(&dentry->d_subdirs)) { 656 this_parent = dentry; 657 goto repeat; 658 } 659 } 660 /* 661 * All done at this level ... ascend and resume the search. 662 */ 663 if (this_parent != parent) { 664 next = this_parent->d_u.d_child.next; 665 this_parent = this_parent->d_parent; 666 goto resume; 667 } 668out: 669 spin_unlock(&dcache_lock); 670 return found; 671} 672 673/** 674 * shrink_dcache_parent - prune dcache 675 * @parent: parent of entries to prune 676 * 677 * Prune the dcache to remove unused children of the parent dentry. 678 */ 679 680void shrink_dcache_parent(struct dentry * parent) 681{ 682 int found; 683 684 while ((found = select_parent(parent)) != 0) 685 prune_dcache(found, parent->d_sb); 686} 687 688/* 689 * Scan `nr' dentries and return the number which remain. 690 * 691 * We need to avoid reentering the filesystem if the caller is performing a 692 * GFP_NOFS allocation attempt. One example deadlock is: 693 * 694 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> 695 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> 696 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. 697 * 698 * In this case we return -1 to tell the caller that we baled. 699 */ 700static int shrink_dcache_memory(int nr, gfp_t gfp_mask) 701{ 702 if (nr) { 703 if (!(gfp_mask & __GFP_FS)) 704 return -1; 705 prune_dcache(nr, NULL); 706 } 707 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 708} 709 710/** 711 * d_alloc - allocate a dcache entry 712 * @parent: parent of entry to allocate 713 * @name: qstr of the name 714 * 715 * Allocates a dentry. It returns %NULL if there is insufficient memory 716 * available. On a success the dentry is returned. The name passed in is 717 * copied and the copy passed in may be reused after this call. 718 */ 719 720struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 721{ 722 struct dentry *dentry; 723 char *dname; 724 725 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 726 if (!dentry) 727 return NULL; 728 729 if (name->len > DNAME_INLINE_LEN-1) { 730 dname = kmalloc(name->len + 1, GFP_KERNEL); 731 if (!dname) { 732 kmem_cache_free(dentry_cache, dentry); 733 return NULL; 734 } 735 } else { 736 dname = dentry->d_iname; 737 } 738 dentry->d_name.name = dname; 739 740 dentry->d_name.len = name->len; 741 dentry->d_name.hash = name->hash; 742 memcpy(dname, name->name, name->len); 743 dname[name->len] = 0; 744 745 atomic_set(&dentry->d_count, 1); 746 dentry->d_flags = DCACHE_UNHASHED; 747 spin_lock_init(&dentry->d_lock); 748 dentry->d_inode = NULL; 749 dentry->d_parent = NULL; 750 dentry->d_sb = NULL; 751 dentry->d_op = NULL; 752 dentry->d_fsdata = NULL; 753 dentry->d_mounted = 0; 754#ifdef CONFIG_PROFILING 755 dentry->d_cookie = NULL; 756#endif 757 INIT_HLIST_NODE(&dentry->d_hash); 758 INIT_LIST_HEAD(&dentry->d_lru); 759 INIT_LIST_HEAD(&dentry->d_subdirs); 760 INIT_LIST_HEAD(&dentry->d_alias); 761 762 if (parent) { 763 dentry->d_parent = dget(parent); 764 dentry->d_sb = parent->d_sb; 765 } else { 766 INIT_LIST_HEAD(&dentry->d_u.d_child); 767 } 768 769 spin_lock(&dcache_lock); 770 if (parent) 771 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 772 dentry_stat.nr_dentry++; 773 spin_unlock(&dcache_lock); 774 775 return dentry; 776} 777 778struct dentry *d_alloc_name(struct dentry *parent, const char *name) 779{ 780 struct qstr q; 781 782 q.name = name; 783 q.len = strlen(name); 784 q.hash = full_name_hash(q.name, q.len); 785 return d_alloc(parent, &q); 786} 787 788/** 789 * d_instantiate - fill in inode information for a dentry 790 * @entry: dentry to complete 791 * @inode: inode to attach to this dentry 792 * 793 * Fill in inode information in the entry. 794 * 795 * This turns negative dentries into productive full members 796 * of society. 797 * 798 * NOTE! This assumes that the inode count has been incremented 799 * (or otherwise set) by the caller to indicate that it is now 800 * in use by the dcache. 801 */ 802 803void d_instantiate(struct dentry *entry, struct inode * inode) 804{ 805 BUG_ON(!list_empty(&entry->d_alias)); 806 spin_lock(&dcache_lock); 807 if (inode) 808 list_add(&entry->d_alias, &inode->i_dentry); 809 entry->d_inode = inode; 810 fsnotify_d_instantiate(entry, inode); 811 spin_unlock(&dcache_lock); 812 security_d_instantiate(entry, inode); 813} 814 815/** 816 * d_instantiate_unique - instantiate a non-aliased dentry 817 * @entry: dentry to instantiate 818 * @inode: inode to attach to this dentry 819 * 820 * Fill in inode information in the entry. On success, it returns NULL. 821 * If an unhashed alias of "entry" already exists, then we return the 822 * aliased dentry instead and drop one reference to inode. 823 * 824 * Note that in order to avoid conflicts with rename() etc, the caller 825 * had better be holding the parent directory semaphore. 826 * 827 * This also assumes that the inode count has been incremented 828 * (or otherwise set) by the caller to indicate that it is now 829 * in use by the dcache. 830 */ 831struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 832{ 833 struct dentry *alias; 834 int len = entry->d_name.len; 835 const char *name = entry->d_name.name; 836 unsigned int hash = entry->d_name.hash; 837 838 BUG_ON(!list_empty(&entry->d_alias)); 839 spin_lock(&dcache_lock); 840 if (!inode) 841 goto do_negative; 842 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 843 struct qstr *qstr = &alias->d_name; 844 845 if (qstr->hash != hash) 846 continue; 847 if (alias->d_parent != entry->d_parent) 848 continue; 849 if (qstr->len != len) 850 continue; 851 if (memcmp(qstr->name, name, len)) 852 continue; 853 dget_locked(alias); 854 spin_unlock(&dcache_lock); 855 BUG_ON(!d_unhashed(alias)); 856 iput(inode); 857 return alias; 858 } 859 list_add(&entry->d_alias, &inode->i_dentry); 860do_negative: 861 entry->d_inode = inode; 862 fsnotify_d_instantiate(entry, inode); 863 spin_unlock(&dcache_lock); 864 security_d_instantiate(entry, inode); 865 return NULL; 866} 867EXPORT_SYMBOL(d_instantiate_unique); 868 869/** 870 * d_alloc_root - allocate root dentry 871 * @root_inode: inode to allocate the root for 872 * 873 * Allocate a root ("/") dentry for the inode given. The inode is 874 * instantiated and returned. %NULL is returned if there is insufficient 875 * memory or the inode passed is %NULL. 876 */ 877 878struct dentry * d_alloc_root(struct inode * root_inode) 879{ 880 struct dentry *res = NULL; 881 882 if (root_inode) { 883 static const struct qstr name = { .name = "/", .len = 1 }; 884 885 res = d_alloc(NULL, &name); 886 if (res) { 887 res->d_sb = root_inode->i_sb; 888 res->d_parent = res; 889 d_instantiate(res, root_inode); 890 } 891 } 892 return res; 893} 894 895static inline struct hlist_head *d_hash(struct dentry *parent, 896 unsigned long hash) 897{ 898 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 899 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 900 return dentry_hashtable + (hash & D_HASHMASK); 901} 902 903/** 904 * d_alloc_anon - allocate an anonymous dentry 905 * @inode: inode to allocate the dentry for 906 * 907 * This is similar to d_alloc_root. It is used by filesystems when 908 * creating a dentry for a given inode, often in the process of 909 * mapping a filehandle to a dentry. The returned dentry may be 910 * anonymous, or may have a full name (if the inode was already 911 * in the cache). The file system may need to make further 912 * efforts to connect this dentry into the dcache properly. 913 * 914 * When called on a directory inode, we must ensure that 915 * the inode only ever has one dentry. If a dentry is 916 * found, that is returned instead of allocating a new one. 917 * 918 * On successful return, the reference to the inode has been transferred 919 * to the dentry. If %NULL is returned (indicating kmalloc failure), 920 * the reference on the inode has not been released. 921 */ 922 923struct dentry * d_alloc_anon(struct inode *inode) 924{ 925 static const struct qstr anonstring = { .name = "" }; 926 struct dentry *tmp; 927 struct dentry *res; 928 929 if ((res = d_find_alias(inode))) { 930 iput(inode); 931 return res; 932 } 933 934 tmp = d_alloc(NULL, &anonstring); 935 if (!tmp) 936 return NULL; 937 938 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 939 940 spin_lock(&dcache_lock); 941 res = __d_find_alias(inode, 0); 942 if (!res) { 943 /* attach a disconnected dentry */ 944 res = tmp; 945 tmp = NULL; 946 spin_lock(&res->d_lock); 947 res->d_sb = inode->i_sb; 948 res->d_parent = res; 949 res->d_inode = inode; 950 res->d_flags |= DCACHE_DISCONNECTED; 951 res->d_flags &= ~DCACHE_UNHASHED; 952 list_add(&res->d_alias, &inode->i_dentry); 953 hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); 954 spin_unlock(&res->d_lock); 955 956 inode = NULL; /* don't drop reference */ 957 } 958 spin_unlock(&dcache_lock); 959 960 if (inode) 961 iput(inode); 962 if (tmp) 963 dput(tmp); 964 return res; 965} 966 967 968/** 969 * d_splice_alias - splice a disconnected dentry into the tree if one exists 970 * @inode: the inode which may have a disconnected dentry 971 * @dentry: a negative dentry which we want to point to the inode. 972 * 973 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 974 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 975 * and return it, else simply d_add the inode to the dentry and return NULL. 976 * 977 * This is needed in the lookup routine of any filesystem that is exportable 978 * (via knfsd) so that we can build dcache paths to directories effectively. 979 * 980 * If a dentry was found and moved, then it is returned. Otherwise NULL 981 * is returned. This matches the expected return value of ->lookup. 982 * 983 */ 984struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 985{ 986 struct dentry *new = NULL; 987 988 if (inode) { 989 spin_lock(&dcache_lock); 990 new = __d_find_alias(inode, 1); 991 if (new) { 992 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 993 fsnotify_d_instantiate(new, inode); 994 spin_unlock(&dcache_lock); 995 security_d_instantiate(new, inode); 996 d_rehash(dentry); 997 d_move(new, dentry); 998 iput(inode); 999 } else { 1000 /* d_instantiate takes dcache_lock, so we do it by hand */ 1001 list_add(&dentry->d_alias, &inode->i_dentry); 1002 dentry->d_inode = inode; 1003 fsnotify_d_instantiate(dentry, inode); 1004 spin_unlock(&dcache_lock); 1005 security_d_instantiate(dentry, inode); 1006 d_rehash(dentry); 1007 } 1008 } else 1009 d_add(dentry, inode); 1010 return new; 1011} 1012 1013 1014/** 1015 * d_lookup - search for a dentry 1016 * @parent: parent dentry 1017 * @name: qstr of name we wish to find 1018 * 1019 * Searches the children of the parent dentry for the name in question. If 1020 * the dentry is found its reference count is incremented and the dentry 1021 * is returned. The caller must use d_put to free the entry when it has 1022 * finished using it. %NULL is returned on failure. 1023 * 1024 * __d_lookup is dcache_lock free. The hash list is protected using RCU. 1025 * Memory barriers are used while updating and doing lockless traversal. 1026 * To avoid races with d_move while rename is happening, d_lock is used. 1027 * 1028 * Overflows in memcmp(), while d_move, are avoided by keeping the length 1029 * and name pointer in one structure pointed by d_qstr. 1030 * 1031 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while 1032 * lookup is going on. 1033 * 1034 * dentry_unused list is not updated even if lookup finds the required dentry 1035 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1036 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1037 * acquisition. 1038 * 1039 * d_lookup() is protected against the concurrent renames in some unrelated 1040 * directory using the seqlockt_t rename_lock. 1041 */ 1042 1043struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1044{ 1045 struct dentry * dentry = NULL; 1046 unsigned long seq; 1047 1048 do { 1049 seq = read_seqbegin(&rename_lock); 1050 dentry = __d_lookup(parent, name); 1051 if (dentry) 1052 break; 1053 } while (read_seqretry(&rename_lock, seq)); 1054 return dentry; 1055} 1056 1057struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1058{ 1059 unsigned int len = name->len; 1060 unsigned int hash = name->hash; 1061 const unsigned char *str = name->name; 1062 struct hlist_head *head = d_hash(parent,hash); 1063 struct dentry *found = NULL; 1064 struct hlist_node *node; 1065 struct dentry *dentry; 1066 1067 rcu_read_lock(); 1068 1069 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1070 struct qstr *qstr; 1071 1072 if (dentry->d_name.hash != hash) 1073 continue; 1074 if (dentry->d_parent != parent) 1075 continue; 1076 1077 spin_lock(&dentry->d_lock); 1078 1079 /* 1080 * Recheck the dentry after taking the lock - d_move may have 1081 * changed things. Don't bother checking the hash because we're 1082 * about to compare the whole name anyway. 1083 */ 1084 if (dentry->d_parent != parent) 1085 goto next; 1086 1087 /* 1088 * It is safe to compare names since d_move() cannot 1089 * change the qstr (protected by d_lock). 1090 */ 1091 qstr = &dentry->d_name; 1092 if (parent->d_op && parent->d_op->d_compare) { 1093 if (parent->d_op->d_compare(parent, qstr, name)) 1094 goto next; 1095 } else { 1096 if (qstr->len != len) 1097 goto next; 1098 if (memcmp(qstr->name, str, len)) 1099 goto next; 1100 } 1101 1102 if (!d_unhashed(dentry)) { 1103 atomic_inc(&dentry->d_count); 1104 found = dentry; 1105 } 1106 spin_unlock(&dentry->d_lock); 1107 break; 1108next: 1109 spin_unlock(&dentry->d_lock); 1110 } 1111 rcu_read_unlock(); 1112 1113 return found; 1114} 1115 1116/** 1117 * d_hash_and_lookup - hash the qstr then search for a dentry 1118 * @dir: Directory to search in 1119 * @name: qstr of name we wish to find 1120 * 1121 * On hash failure or on lookup failure NULL is returned. 1122 */ 1123struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1124{ 1125 struct dentry *dentry = NULL; 1126 1127 /* 1128 * Check for a fs-specific hash function. Note that we must 1129 * calculate the standard hash first, as the d_op->d_hash() 1130 * routine may choose to leave the hash value unchanged. 1131 */ 1132 name->hash = full_name_hash(name->name, name->len); 1133 if (dir->d_op && dir->d_op->d_hash) { 1134 if (dir->d_op->d_hash(dir, name) < 0) 1135 goto out; 1136 } 1137 dentry = d_lookup(dir, name); 1138out: 1139 return dentry; 1140} 1141 1142/** 1143 * d_validate - verify dentry provided from insecure source 1144 * @dentry: The dentry alleged to be valid child of @dparent 1145 * @dparent: The parent dentry (known to be valid) 1146 * @hash: Hash of the dentry 1147 * @len: Length of the name 1148 * 1149 * An insecure source has sent us a dentry, here we verify it and dget() it. 1150 * This is used by ncpfs in its readdir implementation. 1151 * Zero is returned in the dentry is invalid. 1152 */ 1153 1154int d_validate(struct dentry *dentry, struct dentry *dparent) 1155{ 1156 struct hlist_head *base; 1157 struct hlist_node *lhp; 1158 1159 /* Check whether the ptr might be valid at all.. */ 1160 if (!kmem_ptr_validate(dentry_cache, dentry)) 1161 goto out; 1162 1163 if (dentry->d_parent != dparent) 1164 goto out; 1165 1166 spin_lock(&dcache_lock); 1167 base = d_hash(dparent, dentry->d_name.hash); 1168 hlist_for_each(lhp,base) { 1169 /* hlist_for_each_entry_rcu() not required for d_hash list 1170 * as it is parsed under dcache_lock 1171 */ 1172 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { 1173 __dget_locked(dentry); 1174 spin_unlock(&dcache_lock); 1175 return 1; 1176 } 1177 } 1178 spin_unlock(&dcache_lock); 1179out: 1180 return 0; 1181} 1182 1183/* 1184 * When a file is deleted, we have two options: 1185 * - turn this dentry into a negative dentry 1186 * - unhash this dentry and free it. 1187 * 1188 * Usually, we want to just turn this into 1189 * a negative dentry, but if anybody else is 1190 * currently using the dentry or the inode 1191 * we can't do that and we fall back on removing 1192 * it from the hash queues and waiting for 1193 * it to be deleted later when it has no users 1194 */ 1195 1196/** 1197 * d_delete - delete a dentry 1198 * @dentry: The dentry to delete 1199 * 1200 * Turn the dentry into a negative dentry if possible, otherwise 1201 * remove it from the hash queues so it can be deleted later 1202 */ 1203 1204void d_delete(struct dentry * dentry) 1205{ 1206 int isdir = 0; 1207 /* 1208 * Are we the only user? 1209 */ 1210 spin_lock(&dcache_lock); 1211 spin_lock(&dentry->d_lock); 1212 isdir = S_ISDIR(dentry->d_inode->i_mode); 1213 if (atomic_read(&dentry->d_count) == 1) { 1214 dentry_iput(dentry); 1215 fsnotify_nameremove(dentry, isdir); 1216 1217 /* remove this and other inotify debug checks after 2.6.18 */ 1218 dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; 1219 return; 1220 } 1221 1222 if (!d_unhashed(dentry)) 1223 __d_drop(dentry); 1224 1225 spin_unlock(&dentry->d_lock); 1226 spin_unlock(&dcache_lock); 1227 1228 fsnotify_nameremove(dentry, isdir); 1229} 1230 1231static void __d_rehash(struct dentry * entry, struct hlist_head *list) 1232{ 1233 1234 entry->d_flags &= ~DCACHE_UNHASHED; 1235 hlist_add_head_rcu(&entry->d_hash, list); 1236} 1237 1238/** 1239 * d_rehash - add an entry back to the hash 1240 * @entry: dentry to add to the hash 1241 * 1242 * Adds a dentry to the hash according to its name. 1243 */ 1244 1245void d_rehash(struct dentry * entry) 1246{ 1247 struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); 1248 1249 spin_lock(&dcache_lock); 1250 spin_lock(&entry->d_lock); 1251 __d_rehash(entry, list); 1252 spin_unlock(&entry->d_lock); 1253 spin_unlock(&dcache_lock); 1254} 1255 1256#define do_switch(x,y) do { \ 1257 __typeof__ (x) __tmp = x; \ 1258 x = y; y = __tmp; } while (0) 1259 1260/* 1261 * When switching names, the actual string doesn't strictly have to 1262 * be preserved in the target - because we're dropping the target 1263 * anyway. As such, we can just do a simple memcpy() to copy over 1264 * the new name before we switch. 1265 * 1266 * Note that we have to be a lot more careful about getting the hash 1267 * switched - we have to switch the hash value properly even if it 1268 * then no longer matches the actual (corrupted) string of the target. 1269 * The hash value has to match the hash queue that the dentry is on.. 1270 */ 1271static void switch_names(struct dentry *dentry, struct dentry *target) 1272{ 1273 if (dname_external(target)) { 1274 if (dname_external(dentry)) { 1275 /* 1276 * Both external: swap the pointers 1277 */ 1278 do_switch(target->d_name.name, dentry->d_name.name); 1279 } else { 1280 /* 1281 * dentry:internal, target:external. Steal target's 1282 * storage and make target internal. 1283 */ 1284 dentry->d_name.name = target->d_name.name; 1285 target->d_name.name = target->d_iname; 1286 } 1287 } else { 1288 if (dname_external(dentry)) { 1289 /* 1290 * dentry:external, target:internal. Give dentry's 1291 * storage to target and make dentry internal 1292 */ 1293 memcpy(dentry->d_iname, target->d_name.name, 1294 target->d_name.len + 1); 1295 target->d_name.name = dentry->d_name.name; 1296 dentry->d_name.name = dentry->d_iname; 1297 } else { 1298 /* 1299 * Both are internal. Just copy target to dentry 1300 */ 1301 memcpy(dentry->d_iname, target->d_name.name, 1302 target->d_name.len + 1); 1303 } 1304 } 1305} 1306 1307/* 1308 * We cannibalize "target" when moving dentry on top of it, 1309 * because it's going to be thrown away anyway. We could be more 1310 * polite about it, though. 1311 * 1312 * This forceful removal will result in ugly /proc output if 1313 * somebody holds a file open that got deleted due to a rename. 1314 * We could be nicer about the deleted file, and let it show 1315 * up under the name it got deleted rather than the name that 1316 * deleted it. 1317 */ 1318 1319/** 1320 * d_move - move a dentry 1321 * @dentry: entry to move 1322 * @target: new dentry 1323 * 1324 * Update the dcache to reflect the move of a file name. Negative 1325 * dcache entries should not be moved in this way. 1326 */ 1327 1328void d_move(struct dentry * dentry, struct dentry * target) 1329{ 1330 struct hlist_head *list; 1331 1332 if (!dentry->d_inode) 1333 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 1334 1335 spin_lock(&dcache_lock); 1336 write_seqlock(&rename_lock); 1337 /* 1338 * XXXX: do we really need to take target->d_lock? 1339 */ 1340 if (target < dentry) { 1341 spin_lock(&target->d_lock); 1342 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1343 } else { 1344 spin_lock(&dentry->d_lock); 1345 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); 1346 } 1347 1348 /* Move the dentry to the target hash queue, if on different bucket */ 1349 if (dentry->d_flags & DCACHE_UNHASHED) 1350 goto already_unhashed; 1351 1352 hlist_del_rcu(&dentry->d_hash); 1353 1354already_unhashed: 1355 list = d_hash(target->d_parent, target->d_name.hash); 1356 __d_rehash(dentry, list); 1357 1358 /* Unhash the target: dput() will then get rid of it */ 1359 __d_drop(target); 1360 1361 list_del(&dentry->d_u.d_child); 1362 list_del(&target->d_u.d_child); 1363 1364 /* Switch the names.. */ 1365 switch_names(dentry, target); 1366 do_switch(dentry->d_name.len, target->d_name.len); 1367 do_switch(dentry->d_name.hash, target->d_name.hash); 1368 1369 /* ... and switch the parents */ 1370 if (IS_ROOT(dentry)) { 1371 dentry->d_parent = target->d_parent; 1372 target->d_parent = target; 1373 INIT_LIST_HEAD(&target->d_u.d_child); 1374 } else { 1375 do_switch(dentry->d_parent, target->d_parent); 1376 1377 /* And add them back to the (new) parent lists */ 1378 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 1379 } 1380 1381 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1382 spin_unlock(&target->d_lock); 1383 fsnotify_d_move(dentry); 1384 spin_unlock(&dentry->d_lock); 1385 write_sequnlock(&rename_lock); 1386 spin_unlock(&dcache_lock); 1387} 1388 1389/** 1390 * d_path - return the path of a dentry 1391 * @dentry: dentry to report 1392 * @vfsmnt: vfsmnt to which the dentry belongs 1393 * @root: root dentry 1394 * @rootmnt: vfsmnt to which the root dentry belongs 1395 * @buffer: buffer to return value in 1396 * @buflen: buffer length 1397 * 1398 * Convert a dentry into an ASCII path name. If the entry has been deleted 1399 * the string " (deleted)" is appended. Note that this is ambiguous. 1400 * 1401 * Returns the buffer or an error code if the path was too long. 1402 * 1403 * "buflen" should be positive. Caller holds the dcache_lock. 1404 */ 1405static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, 1406 struct dentry *root, struct vfsmount *rootmnt, 1407 char *buffer, int buflen) 1408{ 1409 char * end = buffer+buflen; 1410 char * retval; 1411 int namelen; 1412 1413 *--end = '\0'; 1414 buflen--; 1415 if (!IS_ROOT(dentry) && d_unhashed(dentry)) { 1416 buflen -= 10; 1417 end -= 10; 1418 if (buflen < 0) 1419 goto Elong; 1420 memcpy(end, " (deleted)", 10); 1421 } 1422 1423 if (buflen < 1) 1424 goto Elong; 1425 /* Get '/' right */ 1426 retval = end-1; 1427 *retval = '/'; 1428 1429 for (;;) { 1430 struct dentry * parent; 1431 1432 if (dentry == root && vfsmnt == rootmnt) 1433 break; 1434 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 1435 /* Global root? */ 1436 spin_lock(&vfsmount_lock); 1437 if (vfsmnt->mnt_parent == vfsmnt) { 1438 spin_unlock(&vfsmount_lock); 1439 goto global_root; 1440 } 1441 dentry = vfsmnt->mnt_mountpoint; 1442 vfsmnt = vfsmnt->mnt_parent; 1443 spin_unlock(&vfsmount_lock); 1444 continue; 1445 } 1446 parent = dentry->d_parent; 1447 prefetch(parent); 1448 namelen = dentry->d_name.len; 1449 buflen -= namelen + 1; 1450 if (buflen < 0) 1451 goto Elong; 1452 end -= namelen; 1453 memcpy(end, dentry->d_name.name, namelen); 1454 *--end = '/'; 1455 retval = end; 1456 dentry = parent; 1457 } 1458 1459 return retval; 1460 1461global_root: 1462 namelen = dentry->d_name.len; 1463 buflen -= namelen; 1464 if (buflen < 0) 1465 goto Elong; 1466 retval -= namelen-1; /* hit the slash */ 1467 memcpy(retval, dentry->d_name.name, namelen); 1468 return retval; 1469Elong: 1470 return ERR_PTR(-ENAMETOOLONG); 1471} 1472 1473/* write full pathname into buffer and return start of pathname */ 1474char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, 1475 char *buf, int buflen) 1476{ 1477 char *res; 1478 struct vfsmount *rootmnt; 1479 struct dentry *root; 1480 1481 read_lock(&current->fs->lock); 1482 rootmnt = mntget(current->fs->rootmnt); 1483 root = dget(current->fs->root); 1484 read_unlock(&current->fs->lock); 1485 spin_lock(&dcache_lock); 1486 res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); 1487 spin_unlock(&dcache_lock); 1488 dput(root); 1489 mntput(rootmnt); 1490 return res; 1491} 1492 1493/* 1494 * NOTE! The user-level library version returns a 1495 * character pointer. The kernel system call just 1496 * returns the length of the buffer filled (which 1497 * includes the ending '\0' character), or a negative 1498 * error value. So libc would do something like 1499 * 1500 * char *getcwd(char * buf, size_t size) 1501 * { 1502 * int retval; 1503 * 1504 * retval = sys_getcwd(buf, size); 1505 * if (retval >= 0) 1506 * return buf; 1507 * errno = -retval; 1508 * return NULL; 1509 * } 1510 */ 1511asmlinkage long sys_getcwd(char __user *buf, unsigned long size) 1512{ 1513 int error; 1514 struct vfsmount *pwdmnt, *rootmnt; 1515 struct dentry *pwd, *root; 1516 char *page = (char *) __get_free_page(GFP_USER); 1517 1518 if (!page) 1519 return -ENOMEM; 1520 1521 read_lock(&current->fs->lock); 1522 pwdmnt = mntget(current->fs->pwdmnt); 1523 pwd = dget(current->fs->pwd); 1524 rootmnt = mntget(current->fs->rootmnt); 1525 root = dget(current->fs->root); 1526 read_unlock(&current->fs->lock); 1527 1528 error = -ENOENT; 1529 /* Has the current directory has been unlinked? */ 1530 spin_lock(&dcache_lock); 1531 if (pwd->d_parent == pwd || !d_unhashed(pwd)) { 1532 unsigned long len; 1533 char * cwd; 1534 1535 cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE); 1536 spin_unlock(&dcache_lock); 1537 1538 error = PTR_ERR(cwd); 1539 if (IS_ERR(cwd)) 1540 goto out; 1541 1542 error = -ERANGE; 1543 len = PAGE_SIZE + page - cwd; 1544 if (len <= size) { 1545 error = len; 1546 if (copy_to_user(buf, cwd, len)) 1547 error = -EFAULT; 1548 } 1549 } else 1550 spin_unlock(&dcache_lock); 1551 1552out: 1553 dput(pwd); 1554 mntput(pwdmnt); 1555 dput(root); 1556 mntput(rootmnt); 1557 free_page((unsigned long) page); 1558 return error; 1559} 1560 1561/* 1562 * Test whether new_dentry is a subdirectory of old_dentry. 1563 * 1564 * Trivially implemented using the dcache structure 1565 */ 1566 1567/** 1568 * is_subdir - is new dentry a subdirectory of old_dentry 1569 * @new_dentry: new dentry 1570 * @old_dentry: old dentry 1571 * 1572 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 1573 * Returns 0 otherwise. 1574 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 1575 */ 1576 1577int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) 1578{ 1579 int result; 1580 struct dentry * saved = new_dentry; 1581 unsigned long seq; 1582 1583 /* need rcu_readlock to protect against the d_parent trashing due to 1584 * d_move 1585 */ 1586 rcu_read_lock(); 1587 do { 1588 /* for restarting inner loop in case of seq retry */ 1589 new_dentry = saved; 1590 result = 0; 1591 seq = read_seqbegin(&rename_lock); 1592 for (;;) { 1593 if (new_dentry != old_dentry) { 1594 struct dentry * parent = new_dentry->d_parent; 1595 if (parent == new_dentry) 1596 break; 1597 new_dentry = parent; 1598 continue; 1599 } 1600 result = 1; 1601 break; 1602 } 1603 } while (read_seqretry(&rename_lock, seq)); 1604 rcu_read_unlock(); 1605 1606 return result; 1607} 1608 1609void d_genocide(struct dentry *root) 1610{ 1611 struct dentry *this_parent = root; 1612 struct list_head *next; 1613 1614 spin_lock(&dcache_lock); 1615repeat: 1616 next = this_parent->d_subdirs.next; 1617resume: 1618 while (next != &this_parent->d_subdirs) { 1619 struct list_head *tmp = next; 1620 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1621 next = tmp->next; 1622 if (d_unhashed(dentry)||!dentry->d_inode) 1623 continue; 1624 if (!list_empty(&dentry->d_subdirs)) { 1625 this_parent = dentry; 1626 goto repeat; 1627 } 1628 atomic_dec(&dentry->d_count); 1629 } 1630 if (this_parent != root) { 1631 next = this_parent->d_u.d_child.next; 1632 atomic_dec(&this_parent->d_count); 1633 this_parent = this_parent->d_parent; 1634 goto resume; 1635 } 1636 spin_unlock(&dcache_lock); 1637} 1638 1639/** 1640 * find_inode_number - check for dentry with name 1641 * @dir: directory to check 1642 * @name: Name to find. 1643 * 1644 * Check whether a dentry already exists for the given name, 1645 * and return the inode number if it has an inode. Otherwise 1646 * 0 is returned. 1647 * 1648 * This routine is used to post-process directory listings for 1649 * filesystems using synthetic inode numbers, and is necessary 1650 * to keep getcwd() working. 1651 */ 1652 1653ino_t find_inode_number(struct dentry *dir, struct qstr *name) 1654{ 1655 struct dentry * dentry; 1656 ino_t ino = 0; 1657 1658 dentry = d_hash_and_lookup(dir, name); 1659 if (dentry) { 1660 if (dentry->d_inode) 1661 ino = dentry->d_inode->i_ino; 1662 dput(dentry); 1663 } 1664 return ino; 1665} 1666 1667static __initdata unsigned long dhash_entries; 1668static int __init set_dhash_entries(char *str) 1669{ 1670 if (!str) 1671 return 0; 1672 dhash_entries = simple_strtoul(str, &str, 0); 1673 return 1; 1674} 1675__setup("dhash_entries=", set_dhash_entries); 1676 1677static void __init dcache_init_early(void) 1678{ 1679 int loop; 1680 1681 /* If hashes are distributed across NUMA nodes, defer 1682 * hash allocation until vmalloc space is available. 1683 */ 1684 if (hashdist) 1685 return; 1686 1687 dentry_hashtable = 1688 alloc_large_system_hash("Dentry cache", 1689 sizeof(struct hlist_head), 1690 dhash_entries, 1691 13, 1692 HASH_EARLY, 1693 &d_hash_shift, 1694 &d_hash_mask, 1695 0); 1696 1697 for (loop = 0; loop < (1 << d_hash_shift); loop++) 1698 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 1699} 1700 1701static void __init dcache_init(unsigned long mempages) 1702{ 1703 int loop; 1704 1705 /* 1706 * A constructor could be added for stable state like the lists, 1707 * but it is probably not worth it because of the cache nature 1708 * of the dcache. 1709 */ 1710 dentry_cache = kmem_cache_create("dentry_cache", 1711 sizeof(struct dentry), 1712 0, 1713 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1714 SLAB_MEM_SPREAD), 1715 NULL, NULL); 1716 1717 set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); 1718 1719 /* Hash may have been set up in dcache_init_early */ 1720 if (!hashdist) 1721 return; 1722 1723 dentry_hashtable = 1724 alloc_large_system_hash("Dentry cache", 1725 sizeof(struct hlist_head), 1726 dhash_entries, 1727 13, 1728 0, 1729 &d_hash_shift, 1730 &d_hash_mask, 1731 0); 1732 1733 for (loop = 0; loop < (1 << d_hash_shift); loop++) 1734 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 1735} 1736 1737/* SLAB cache for __getname() consumers */ 1738kmem_cache_t *names_cachep __read_mostly; 1739 1740/* SLAB cache for file structures */ 1741kmem_cache_t *filp_cachep __read_mostly; 1742 1743EXPORT_SYMBOL(d_genocide); 1744 1745extern void bdev_cache_init(void); 1746extern void chrdev_init(void); 1747 1748void __init vfs_caches_init_early(void) 1749{ 1750 dcache_init_early(); 1751 inode_init_early(); 1752} 1753 1754void __init vfs_caches_init(unsigned long mempages) 1755{ 1756 unsigned long reserve; 1757 1758 /* Base hash sizes on available memory, with a reserve equal to 1759 150% of current kernel size */ 1760 1761 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 1762 mempages -= reserve; 1763 1764 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 1765 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1766 1767 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 1768 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1769 1770 dcache_init(mempages); 1771 inode_init(mempages); 1772 files_init(mempages); 1773 mnt_init(mempages); 1774 bdev_cache_init(); 1775 chrdev_init(); 1776} 1777 1778EXPORT_SYMBOL(d_alloc); 1779EXPORT_SYMBOL(d_alloc_anon); 1780EXPORT_SYMBOL(d_alloc_root); 1781EXPORT_SYMBOL(d_delete); 1782EXPORT_SYMBOL(d_find_alias); 1783EXPORT_SYMBOL(d_instantiate); 1784EXPORT_SYMBOL(d_invalidate); 1785EXPORT_SYMBOL(d_lookup); 1786EXPORT_SYMBOL(d_move); 1787EXPORT_SYMBOL(d_path); 1788EXPORT_SYMBOL(d_prune_aliases); 1789EXPORT_SYMBOL(d_rehash); 1790EXPORT_SYMBOL(d_splice_alias); 1791EXPORT_SYMBOL(d_validate); 1792EXPORT_SYMBOL(dget_locked); 1793EXPORT_SYMBOL(dput); 1794EXPORT_SYMBOL(find_inode_number); 1795EXPORT_SYMBOL(have_submounts); 1796EXPORT_SYMBOL(names_cachep); 1797EXPORT_SYMBOL(shrink_dcache_parent); 1798EXPORT_SYMBOL(shrink_dcache_sb);