fs: protect inode->i_state with inode->i_lock

Protect inode state transitions and validity checks with the
inode->i_lock. This enables us to make inode state transitions
independently of the inode_lock and is the first step to peeling
away the inode_lock from the code.

This requires that __iget() is done atomically with i_state checks
during list traversals so that we don't race with another thread
marking the inode I_FREEING between the state check and grabbing the
reference.

Also remove the unlock_new_inode() memory barrier optimisation
required to avoid taking the inode_lock when clearing I_NEW.
Simplify the code by simply taking the inode->i_lock around the
state change and wakeup. Because the wakeup is no longer tricky,
remove the wake_up_inode() function and open code the wakeup where
necessary.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

authored by Dave Chinner and committed by Al Viro 250df6ed 3dc8fe4d

+176 -76
+2
fs/block_dev.c
··· 56 56 struct backing_dev_info *dst) 57 57 { 58 58 spin_lock(&inode_lock); 59 + spin_lock(&inode->i_lock); 59 60 inode->i_data.backing_dev_info = dst; 60 61 if (inode->i_state & I_DIRTY) 61 62 list_move(&inode->i_wb_list, &dst->wb.b_dirty); 63 + spin_unlock(&inode->i_lock); 62 64 spin_unlock(&inode_lock); 63 65 } 64 66
+1 -1
fs/buffer.c
··· 1144 1144 * inode list. 1145 1145 * 1146 1146 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, 1147 - * mapping->tree_lock and the global inode_lock. 1147 + * mapping->tree_lock and mapping->host->i_lock. 1148 1148 */ 1149 1149 void mark_buffer_dirty(struct buffer_head *bh) 1150 1150 {
+6 -3
fs/drop_caches.c
··· 18 18 19 19 spin_lock(&inode_lock); 20 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 21 - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 21 + spin_lock(&inode->i_lock); 22 + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 23 + (inode->i_mapping->nrpages == 0)) { 24 + spin_unlock(&inode->i_lock); 22 25 continue; 23 - if (inode->i_mapping->nrpages == 0) 24 - continue; 26 + } 25 27 __iget(inode); 28 + spin_unlock(&inode->i_lock); 26 29 spin_unlock(&inode_lock); 27 30 invalidate_mapping_pages(inode->i_mapping, 0, -1); 28 31 iput(toput_inode);
+34 -10
fs/fs-writeback.c
··· 306 306 wait_queue_head_t *wqh; 307 307 308 308 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 309 - while (inode->i_state & I_SYNC) { 309 + while (inode->i_state & I_SYNC) { 310 + spin_unlock(&inode->i_lock); 310 311 spin_unlock(&inode_lock); 311 312 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 312 313 spin_lock(&inode_lock); 314 + spin_lock(&inode->i_lock); 313 315 } 314 316 } 315 317 ··· 335 333 unsigned dirty; 336 334 int ret; 337 335 336 + spin_lock(&inode->i_lock); 338 337 if (!atomic_read(&inode->i_count)) 339 338 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); 340 339 else ··· 351 348 * completed a full scan of b_io. 352 349 */ 353 350 if (wbc->sync_mode != WB_SYNC_ALL) { 351 + spin_unlock(&inode->i_lock); 354 352 requeue_io(inode); 355 353 return 0; 356 354 } ··· 367 363 /* Set I_SYNC, reset I_DIRTY_PAGES */ 368 364 inode->i_state |= I_SYNC; 369 365 inode->i_state &= ~I_DIRTY_PAGES; 366 + spin_unlock(&inode->i_lock); 370 367 spin_unlock(&inode_lock); 371 368 372 369 ret = do_writepages(mapping, wbc); ··· 389 384 * write_inode() 390 385 */ 391 386 spin_lock(&inode_lock); 387 + spin_lock(&inode->i_lock); 392 388 dirty = inode->i_state & I_DIRTY; 393 389 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 390 + spin_unlock(&inode->i_lock); 394 391 spin_unlock(&inode_lock); 395 392 /* Don't write the inode if only I_DIRTY_PAGES was set */ 396 393 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { ··· 402 395 } 403 396 404 397 spin_lock(&inode_lock); 398 + spin_lock(&inode->i_lock); 405 399 inode->i_state &= ~I_SYNC; 406 400 if (!(inode->i_state & I_FREEING)) { 407 401 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { ··· 444 436 } 445 437 } 446 438 inode_sync_complete(inode); 439 + spin_unlock(&inode->i_lock); 447 440 return ret; 448 441 } 449 442 ··· 515 506 * kind does not need peridic writeout yet, and for the latter 516 507 * kind writeout is handled by the freer. 517 508 */ 509 + spin_lock(&inode->i_lock); 518 510 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 511 + spin_unlock(&inode->i_lock); 519 512 requeue_io(inode); 520 513 continue; 521 514 } ··· 526 515 * Was this inode dirtied after sync_sb_inodes was called? 527 516 * This keeps sync from extra jobs and livelock. 528 517 */ 529 - if (inode_dirtied_after(inode, wbc->wb_start)) 518 + if (inode_dirtied_after(inode, wbc->wb_start)) { 519 + spin_unlock(&inode->i_lock); 530 520 return 1; 521 + } 531 522 532 523 __iget(inode); 524 + spin_unlock(&inode->i_lock); 525 + 533 526 pages_skipped = wbc->pages_skipped; 534 527 writeback_single_inode(inode, wbc); 535 528 if (wbc->pages_skipped != pages_skipped) { ··· 739 724 if (!list_empty(&wb->b_more_io)) { 740 725 inode = wb_inode(wb->b_more_io.prev); 741 726 trace_wbc_writeback_wait(&wbc, wb->bdi); 727 + spin_lock(&inode->i_lock); 742 728 inode_wait_for_writeback(inode); 729 + spin_unlock(&inode->i_lock); 743 730 } 744 731 spin_unlock(&inode_lock); 745 732 } ··· 1034 1017 block_dump___mark_inode_dirty(inode); 1035 1018 1036 1019 spin_lock(&inode_lock); 1020 + spin_lock(&inode->i_lock); 1037 1021 if ((inode->i_state & flags) != flags) { 1038 1022 const int was_dirty = inode->i_state & I_DIRTY; 1039 1023 ··· 1046 1028 * superblock list, based upon its state. 1047 1029 */ 1048 1030 if (inode->i_state & I_SYNC) 1049 - goto out; 1031 + goto out_unlock_inode; 1050 1032 1051 1033 /* 1052 1034 * Only add valid (hashed) inodes to the superblock's ··· 1054 1036 */ 1055 1037 if (!S_ISBLK(inode->i_mode)) { 1056 1038 if (inode_unhashed(inode)) 1057 - goto out; 1039 + goto out_unlock_inode; 1058 1040 } 1059 1041 if (inode->i_state & I_FREEING) 1060 - goto out; 1042 + goto out_unlock_inode; 1061 1043 1044 + spin_unlock(&inode->i_lock); 1062 1045 /* 1063 1046 * If the inode was already on b_dirty/b_io/b_more_io, don't 1064 1047 * reposition it (that would break b_dirty time-ordering). ··· 1084 1065 inode->dirtied_when = jiffies; 1085 1066 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1086 1067 } 1068 + goto out; 1087 1069 } 1070 + out_unlock_inode: 1071 + spin_unlock(&inode->i_lock); 1088 1072 out: 1089 1073 spin_unlock(&inode_lock); 1090 1074 ··· 1133 1111 * we still have to wait for that writeout. 1134 1112 */ 1135 1113 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1136 - struct address_space *mapping; 1114 + struct address_space *mapping = inode->i_mapping; 1137 1115 1138 - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 1116 + spin_lock(&inode->i_lock); 1117 + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 1118 + (mapping->nrpages == 0)) { 1119 + spin_unlock(&inode->i_lock); 1139 1120 continue; 1140 - mapping = inode->i_mapping; 1141 - if (mapping->nrpages == 0) 1142 - continue; 1121 + } 1143 1122 __iget(inode); 1123 + spin_unlock(&inode->i_lock); 1144 1124 spin_unlock(&inode_lock); 1145 1125 /* 1146 1126 * We hold a reference to 'inode' so it couldn't have
+106 -48
fs/inode.c
··· 28 28 #include <linux/cred.h> 29 29 30 30 /* 31 + * inode locking rules. 32 + * 33 + * inode->i_lock protects: 34 + * inode->i_state, inode->i_hash, __iget() 35 + * 36 + * Lock ordering: 37 + * inode_lock 38 + * inode->i_lock 39 + */ 40 + 41 + /* 31 42 * This is needed for the following functions: 32 43 * - inode_has_buffers 33 44 * - invalidate_bdev ··· 147 136 return proc_dointvec(table, write, buffer, lenp, ppos); 148 137 } 149 138 #endif 150 - 151 - static void wake_up_inode(struct inode *inode) 152 - { 153 - /* 154 - * Prevent speculative execution through spin_unlock(&inode_lock); 155 - */ 156 - smp_mb(); 157 - wake_up_bit(&inode->i_state, __I_NEW); 158 - } 159 139 160 140 /** 161 141 * inode_init_always - perform inode structure intialisation ··· 338 336 } 339 337 340 338 /* 341 - * inode_lock must be held 339 + * inode->i_lock must be held 342 340 */ 343 341 void __iget(struct inode *inode) 344 342 { ··· 415 413 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 416 414 417 415 spin_lock(&inode_lock); 416 + spin_lock(&inode->i_lock); 418 417 hlist_add_head(&inode->i_hash, b); 418 + spin_unlock(&inode->i_lock); 419 419 spin_unlock(&inode_lock); 420 420 } 421 421 EXPORT_SYMBOL(__insert_inode_hash); ··· 442 438 void remove_inode_hash(struct inode *inode) 443 439 { 444 440 spin_lock(&inode_lock); 441 + spin_lock(&inode->i_lock); 445 442 hlist_del_init(&inode->i_hash); 443 + spin_unlock(&inode->i_lock); 446 444 spin_unlock(&inode_lock); 447 445 } 448 446 EXPORT_SYMBOL(remove_inode_hash); ··· 501 495 __inode_sb_list_del(inode); 502 496 spin_unlock(&inode_lock); 503 497 504 - wake_up_inode(inode); 498 + spin_lock(&inode->i_lock); 499 + wake_up_bit(&inode->i_state, __I_NEW); 500 + spin_unlock(&inode->i_lock); 505 501 destroy_inode(inode); 506 502 } 507 503 } ··· 526 518 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 527 519 if (atomic_read(&inode->i_count)) 528 520 continue; 529 - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 521 + 522 + spin_lock(&inode->i_lock); 523 + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 524 + spin_unlock(&inode->i_lock); 530 525 continue; 526 + } 531 527 532 528 inode->i_state |= I_FREEING; 529 + if (!(inode->i_state & (I_DIRTY | I_SYNC))) 530 + inodes_stat.nr_unused--; 531 + spin_unlock(&inode->i_lock); 533 532 534 533 /* 535 534 * Move the inode off the IO lists and LRU once I_FREEING is ··· 544 529 */ 545 530 list_move(&inode->i_lru, &dispose); 546 531 list_del_init(&inode->i_wb_list); 547 - if (!(inode->i_state & (I_DIRTY | I_SYNC))) 548 - inodes_stat.nr_unused--; 549 532 } 550 533 spin_unlock(&inode_lock); 551 534 ··· 576 563 577 564 spin_lock(&inode_lock); 578 565 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 579 - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 566 + spin_lock(&inode->i_lock); 567 + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 568 + spin_unlock(&inode->i_lock); 580 569 continue; 570 + } 581 571 if (inode->i_state & I_DIRTY && !kill_dirty) { 572 + spin_unlock(&inode->i_lock); 582 573 busy = 1; 583 574 continue; 584 575 } 585 576 if (atomic_read(&inode->i_count)) { 577 + spin_unlock(&inode->i_lock); 586 578 busy = 1; 587 579 continue; 588 580 } 589 581 590 582 inode->i_state |= I_FREEING; 583 + if (!(inode->i_state & (I_DIRTY | I_SYNC))) 584 + inodes_stat.nr_unused--; 585 + spin_unlock(&inode->i_lock); 591 586 592 587 /* 593 588 * Move the inode off the IO lists and LRU once I_FREEING is ··· 603 582 */ 604 583 list_move(&inode->i_lru, &dispose); 605 584 list_del_init(&inode->i_wb_list); 606 - if (!(inode->i_state & (I_DIRTY | I_SYNC))) 607 - inodes_stat.nr_unused--; 608 585 } 609 586 spin_unlock(&inode_lock); 610 587 ··· 660 641 * Referenced or dirty inodes are still in use. Give them 661 642 * another pass through the LRU as we canot reclaim them now. 662 643 */ 644 + spin_lock(&inode->i_lock); 663 645 if (atomic_read(&inode->i_count) || 664 646 (inode->i_state & ~I_REFERENCED)) { 647 + spin_unlock(&inode->i_lock); 665 648 list_del_init(&inode->i_lru); 666 649 inodes_stat.nr_unused--; 667 650 continue; ··· 671 650 672 651 /* recently referenced inodes get one more pass */ 673 652 if (inode->i_state & I_REFERENCED) { 674 - list_move(&inode->i_lru, &inode_lru); 675 653 inode->i_state &= ~I_REFERENCED; 654 + spin_unlock(&inode->i_lock); 655 + list_move(&inode->i_lru, &inode_lru); 676 656 continue; 677 657 } 678 658 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 679 659 __iget(inode); 660 + spin_unlock(&inode->i_lock); 680 661 spin_unlock(&inode_lock); 681 662 if (remove_inode_buffers(inode)) 682 663 reap += invalidate_mapping_pages(&inode->i_data, ··· 689 666 if (inode != list_entry(inode_lru.next, 690 667 struct inode, i_lru)) 691 668 continue; /* wrong inode or list_empty */ 692 - if (!can_unuse(inode)) 669 + spin_lock(&inode->i_lock); 670 + if (!can_unuse(inode)) { 671 + spin_unlock(&inode->i_lock); 693 672 continue; 673 + } 694 674 } 695 675 WARN_ON(inode->i_state & I_NEW); 696 676 inode->i_state |= I_FREEING; 677 + spin_unlock(&inode->i_lock); 697 678 698 679 /* 699 680 * Move the inode off the IO lists and LRU once I_FREEING is ··· 764 737 continue; 765 738 if (!test(inode, data)) 766 739 continue; 740 + spin_lock(&inode->i_lock); 767 741 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 768 742 __wait_on_freeing_inode(inode); 769 743 goto repeat; 770 744 } 771 745 __iget(inode); 746 + spin_unlock(&inode->i_lock); 772 747 return inode; 773 748 } 774 749 return NULL; ··· 792 763 continue; 793 764 if (inode->i_sb != sb) 794 765 continue; 766 + spin_lock(&inode->i_lock); 795 767 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 796 768 __wait_on_freeing_inode(inode); 797 769 goto repeat; 798 770 } 799 771 __iget(inode); 772 + spin_unlock(&inode->i_lock); 800 773 return inode; 801 774 } 802 775 return NULL; ··· 863 832 inode = alloc_inode(sb); 864 833 if (inode) { 865 834 spin_lock(&inode_lock); 866 - __inode_sb_list_add(inode); 835 + spin_lock(&inode->i_lock); 867 836 inode->i_state = 0; 837 + spin_unlock(&inode->i_lock); 838 + __inode_sb_list_add(inode); 868 839 spin_unlock(&inode_lock); 869 840 } 870 841 return inode; 871 842 } 872 843 EXPORT_SYMBOL(new_inode); 873 844 845 + /** 846 + * unlock_new_inode - clear the I_NEW state and wake up any waiters 847 + * @inode: new inode to unlock 848 + * 849 + * Called when the inode is fully initialised to clear the new state of the 850 + * inode and wake up anyone waiting for the inode to finish initialisation. 851 + */ 874 852 void unlock_new_inode(struct inode *inode) 875 853 { 876 854 #ifdef CONFIG_DEBUG_LOCK_ALLOC ··· 899 859 } 900 860 } 901 861 #endif 902 - /* 903 - * This is special! We do not need the spinlock when clearing I_NEW, 904 - * because we're guaranteed that nobody else tries to do anything about 905 - * the state of the inode when it is locked, as we just created it (so 906 - * there can be no old holders that haven't tested I_NEW). 907 - * However we must emit the memory barrier so that other CPUs reliably 908 - * see the clearing of I_NEW after the other inode initialisation has 909 - * completed. 910 - */ 911 - smp_mb(); 862 + spin_lock(&inode->i_lock); 912 863 WARN_ON(!(inode->i_state & I_NEW)); 913 864 inode->i_state &= ~I_NEW; 914 - wake_up_inode(inode); 865 + wake_up_bit(&inode->i_state, __I_NEW); 866 + spin_unlock(&inode->i_lock); 915 867 } 916 868 EXPORT_SYMBOL(unlock_new_inode); 917 869 ··· 932 900 if (set(inode, data)) 933 901 goto set_failed; 934 902 935 - hlist_add_head(&inode->i_hash, head); 936 - __inode_sb_list_add(inode); 903 + spin_lock(&inode->i_lock); 937 904 inode->i_state = I_NEW; 905 + hlist_add_head(&inode->i_hash, head); 906 + spin_unlock(&inode->i_lock); 907 + __inode_sb_list_add(inode); 938 908 spin_unlock(&inode_lock); 939 909 940 910 /* Return the locked inode with I_NEW set, the ··· 981 947 old = find_inode_fast(sb, head, ino); 982 948 if (!old) { 983 949 inode->i_ino = ino; 984 - hlist_add_head(&inode->i_hash, head); 985 - __inode_sb_list_add(inode); 950 + spin_lock(&inode->i_lock); 986 951 inode->i_state = I_NEW; 952 + hlist_add_head(&inode->i_hash, head); 953 + spin_unlock(&inode->i_lock); 954 + __inode_sb_list_add(inode); 987 955 spin_unlock(&inode_lock); 988 956 989 957 /* Return the locked inode with I_NEW set, the ··· 1070 1034 struct inode *igrab(struct inode *inode) 1071 1035 { 1072 1036 spin_lock(&inode_lock); 1073 - if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) 1037 + spin_lock(&inode->i_lock); 1038 + if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { 1074 1039 __iget(inode); 1075 - else 1040 + spin_unlock(&inode->i_lock); 1041 + } else { 1042 + spin_unlock(&inode->i_lock); 1076 1043 /* 1077 1044 * Handle the case where s_op->clear_inode is not been 1078 1045 * called yet, and somebody is calling igrab 1079 1046 * while the inode is getting freed. 1080 1047 */ 1081 1048 inode = NULL; 1049 + } 1082 1050 spin_unlock(&inode_lock); 1083 1051 return inode; 1084 1052 } ··· 1311 1271 ino_t ino = inode->i_ino; 1312 1272 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1313 1273 1314 - inode->i_state |= I_NEW; 1315 1274 while (1) { 1316 1275 struct hlist_node *node; 1317 1276 struct inode *old = NULL; ··· 1320 1281 continue; 1321 1282 if (old->i_sb != sb) 1322 1283 continue; 1323 - if (old->i_state & (I_FREEING|I_WILL_FREE)) 1284 + spin_lock(&old->i_lock); 1285 + if (old->i_state & (I_FREEING|I_WILL_FREE)) { 1286 + spin_unlock(&old->i_lock); 1324 1287 continue; 1288 + } 1325 1289 break; 1326 1290 } 1327 1291 if (likely(!node)) { 1292 + spin_lock(&inode->i_lock); 1293 + inode->i_state |= I_NEW; 1328 1294 hlist_add_head(&inode->i_hash, head); 1295 + spin_unlock(&inode->i_lock); 1329 1296 spin_unlock(&inode_lock); 1330 1297 return 0; 1331 1298 } 1332 1299 __iget(old); 1300 + spin_unlock(&old->i_lock); 1333 1301 spin_unlock(&inode_lock); 1334 1302 wait_on_inode(old); 1335 1303 if (unlikely(!inode_unhashed(old))) { ··· 1354 1308 struct super_block *sb = inode->i_sb; 1355 1309 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1356 1310 1357 - inode->i_state |= I_NEW; 1358 - 1359 1311 while (1) { 1360 1312 struct hlist_node *node; 1361 1313 struct inode *old = NULL; ··· 1364 1320 continue; 1365 1321 if (!test(old, data)) 1366 1322 continue; 1367 - if (old->i_state & (I_FREEING|I_WILL_FREE)) 1323 + spin_lock(&old->i_lock); 1324 + if (old->i_state & (I_FREEING|I_WILL_FREE)) { 1325 + spin_unlock(&old->i_lock); 1368 1326 continue; 1327 + } 1369 1328 break; 1370 1329 } 1371 1330 if (likely(!node)) { 1331 + spin_lock(&inode->i_lock); 1332 + inode->i_state |= I_NEW; 1372 1333 hlist_add_head(&inode->i_hash, head); 1334 + spin_unlock(&inode->i_lock); 1373 1335 spin_unlock(&inode_lock); 1374 1336 return 0; 1375 1337 } 1376 1338 __iget(old); 1339 + spin_unlock(&old->i_lock); 1377 1340 spin_unlock(&inode_lock); 1378 1341 wait_on_inode(old); 1379 1342 if (unlikely(!inode_unhashed(old))) { ··· 1426 1375 const struct super_operations *op = inode->i_sb->s_op; 1427 1376 int drop; 1428 1377 1378 + spin_lock(&inode->i_lock); 1379 + WARN_ON(inode->i_state & I_NEW); 1380 + 1429 1381 if (op && op->drop_inode) 1430 1382 drop = op->drop_inode(inode); 1431 1383 else ··· 1440 1386 if (!(inode->i_state & (I_DIRTY|I_SYNC))) { 1441 1387 inode_lru_list_add(inode); 1442 1388 } 1389 + spin_unlock(&inode->i_lock); 1443 1390 spin_unlock(&inode_lock); 1444 1391 return; 1445 1392 } 1446 - WARN_ON(inode->i_state & I_NEW); 1447 1393 inode->i_state |= I_WILL_FREE; 1394 + spin_unlock(&inode->i_lock); 1448 1395 spin_unlock(&inode_lock); 1449 1396 write_inode_now(inode, 1); 1450 1397 spin_lock(&inode_lock); 1398 + spin_lock(&inode->i_lock); 1451 1399 WARN_ON(inode->i_state & I_NEW); 1452 1400 inode->i_state &= ~I_WILL_FREE; 1453 1401 __remove_inode_hash(inode); 1454 1402 } 1455 1403 1456 - WARN_ON(inode->i_state & I_NEW); 1457 1404 inode->i_state |= I_FREEING; 1405 + spin_unlock(&inode->i_lock); 1458 1406 1459 1407 /* 1460 1408 * Move the inode off the IO lists and LRU once I_FREEING is ··· 1469 1413 spin_unlock(&inode_lock); 1470 1414 evict(inode); 1471 1415 remove_inode_hash(inode); 1472 - wake_up_inode(inode); 1416 + spin_lock(&inode->i_lock); 1417 + wake_up_bit(&inode->i_state, __I_NEW); 1473 1418 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 1419 + spin_unlock(&inode->i_lock); 1474 1420 destroy_inode(inode); 1475 1421 } 1476 1422 ··· 1669 1611 * to recheck inode state. 1670 1612 * 1671 1613 * It doesn't matter if I_NEW is not set initially, a call to 1672 - * wake_up_inode() after removing from the hash list will DTRT. 1673 - * 1674 - * This is called with inode_lock held. 1614 + * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list 1615 + * will DTRT. 1675 1616 */ 1676 1617 static void __wait_on_freeing_inode(struct inode *inode) 1677 1618 { ··· 1678 1621 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1679 1622 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1680 1623 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1624 + spin_unlock(&inode->i_lock); 1681 1625 spin_unlock(&inode_lock); 1682 1626 schedule(); 1683 1627 finish_wait(wq, &wait.wait);
+15 -6
fs/notify/inode_mark.c
··· 254 254 * I_WILL_FREE, or I_NEW which is fine because by that point 255 255 * the inode cannot have any associated watches. 256 256 */ 257 - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 257 + spin_lock(&inode->i_lock); 258 + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { 259 + spin_unlock(&inode->i_lock); 258 260 continue; 261 + } 259 262 260 263 /* 261 264 * If i_count is zero, the inode cannot have any watches and ··· 266 263 * evict all inodes with zero i_count from icache which is 267 264 * unnecessarily violent and may in fact be illegal to do. 268 265 */ 269 - if (!atomic_read(&inode->i_count)) 266 + if (!atomic_read(&inode->i_count)) { 267 + spin_unlock(&inode->i_lock); 270 268 continue; 269 + } 271 270 272 271 need_iput_tmp = need_iput; 273 272 need_iput = NULL; ··· 279 274 __iget(inode); 280 275 else 281 276 need_iput_tmp = NULL; 277 + spin_unlock(&inode->i_lock); 282 278 283 279 /* In case the dropping of a reference would nuke next_i. */ 284 280 if ((&next_i->i_sb_list != list) && 285 - atomic_read(&next_i->i_count) && 286 - !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { 287 - __iget(next_i); 288 - need_iput = next_i; 281 + atomic_read(&next_i->i_count)) { 282 + spin_lock(&next_i->i_lock); 283 + if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { 284 + __iget(next_i); 285 + need_iput = next_i; 286 + } 287 + spin_unlock(&next_i->i_lock); 289 288 } 290 289 291 290 /*
+7 -6
fs/quota/dquot.c
··· 902 902 903 903 spin_lock(&inode_lock); 904 904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 905 - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 905 + spin_lock(&inode->i_lock); 906 + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 907 + !atomic_read(&inode->i_writecount) || 908 + !dqinit_needed(inode, type)) { 909 + spin_unlock(&inode->i_lock); 906 910 continue; 911 + } 907 912 #ifdef CONFIG_QUOTA_DEBUG 908 913 if (unlikely(inode_get_rsv_space(inode) > 0)) 909 914 reserved = 1; 910 915 #endif 911 - if (!atomic_read(&inode->i_writecount)) 912 - continue; 913 - if (!dqinit_needed(inode, type)) 914 - continue; 915 - 916 916 __iget(inode); 917 + spin_unlock(&inode->i_lock); 917 918 spin_unlock(&inode_lock); 918 919 919 920 iput(old_inode);
+1 -1
include/linux/fs.h
··· 1647 1647 }; 1648 1648 1649 1649 /* 1650 - * Inode state bits. Protected by inode_lock. 1650 + * Inode state bits. Protected by inode->i_lock 1651 1651 * 1652 1652 * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, 1653 1653 * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
+1 -1
include/linux/quotaops.h
··· 277 277 /* 278 278 * Mark inode fully dirty. Since we are allocating blocks, inode 279 279 * would become fully dirty soon anyway and it reportedly 280 - * reduces inode_lock contention. 280 + * reduces lock contention. 281 281 */ 282 282 mark_inode_dirty(inode); 283 283 }
+2
mm/filemap.c
··· 99 99 * ->private_lock (page_remove_rmap->set_page_dirty) 100 100 * ->tree_lock (page_remove_rmap->set_page_dirty) 101 101 * ->inode_lock (page_remove_rmap->set_page_dirty) 102 + * ->inode->i_lock (page_remove_rmap->set_page_dirty) 102 103 * ->inode_lock (zap_pte_range->set_page_dirty) 104 + * ->inode->i_lock (zap_pte_range->set_page_dirty) 103 105 * ->private_lock (zap_pte_range->__set_page_dirty_buffers) 104 106 * 105 107 * (code doesn't rely on that order, so you could switch it around)
+1
mm/rmap.c
··· 32 32 * mmlist_lock (in mmput, drain_mmlist and others) 33 33 * mapping->private_lock (in __set_page_dirty_buffers) 34 34 * inode_lock (in set_page_dirty's __mark_inode_dirty) 35 + * inode->i_lock (in set_page_dirty's __mark_inode_dirty) 35 36 * sb_lock (within inode_lock in fs/fs-writeback.c) 36 37 * mapping->tree_lock (widely used, in set_page_dirty, 37 38 * in arch-dependent flush_dcache_mmap_lock,