locks: reinstate locks_delete_block optimization

There is measurable performance impact in some synthetic tests due to
commit 6d390e4b5d48 (locks: fix a potential use-after-free problem when
wakeup a waiter). Fix the race condition instead by clearing the
fl_blocker pointer after the wake_up, using explicit acquire/release
semantics.

This does mean that we can no longer use the clearing of fl_blocker as
the wait condition, so switch the waiters over to checking whether the
fl_blocked_member list_head is empty.

Reviewed-by: yangerkun <yangerkun@huawei.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Fixes: 6d390e4b5d48 (locks: fix a potential use-after-free problem when wakeup a waiter)
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+50 -7
+2 -1
fs/cifs/file.c
··· 1169 1169 rc = posix_lock_file(file, flock, NULL); 1170 1170 up_write(&cinode->lock_sem); 1171 1171 if (rc == FILE_LOCK_DEFERRED) { 1172 - rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker); 1172 + rc = wait_event_interruptible(flock->fl_wait, 1173 + list_empty(&flock->fl_blocked_member)); 1173 1174 if (!rc) 1174 1175 goto try_again; 1175 1176 locks_delete_block(flock);
+48 -6
fs/locks.c
··· 725 725 { 726 726 locks_delete_global_blocked(waiter); 727 727 list_del_init(&waiter->fl_blocked_member); 728 - waiter->fl_blocker = NULL; 729 728 } 730 729 731 730 static void __locks_wake_up_blocks(struct file_lock *blocker) ··· 739 740 waiter->fl_lmops->lm_notify(waiter); 740 741 else 741 742 wake_up(&waiter->fl_wait); 743 + 744 + /* 745 + * The setting of fl_blocker to NULL marks the "done" 746 + * point in deleting a block. Paired with acquire at the top 747 + * of locks_delete_block(). 748 + */ 749 + smp_store_release(&waiter->fl_blocker, NULL); 742 750 } 743 751 } 744 752 ··· 759 753 { 760 754 int status = -ENOENT; 761 755 756 + /* 757 + * If fl_blocker is NULL, it won't be set again as this thread "owns" 758 + * the lock and is the only one that might try to claim the lock. 759 + * 760 + * We use acquire/release to manage fl_blocker so that we can 761 + * optimize away taking the blocked_lock_lock in many cases. 762 + * 763 + * The smp_load_acquire guarantees two things: 764 + * 765 + * 1/ that fl_blocked_requests can be tested locklessly. If something 766 + * was recently added to that list it must have been in a locked region 767 + * *before* the locked region when fl_blocker was set to NULL. 768 + * 769 + * 2/ that no other thread is accessing 'waiter', so it is safe to free 770 + * it. __locks_wake_up_blocks is careful not to touch waiter after 771 + * fl_blocker is released. 772 + * 773 + * If a lockless check of fl_blocker shows it to be NULL, we know that 774 + * no new locks can be inserted into its fl_blocked_requests list, and 775 + * can avoid doing anything further if the list is empty. 776 + */ 777 + if (!smp_load_acquire(&waiter->fl_blocker) && 778 + list_empty(&waiter->fl_blocked_requests)) 779 + return status; 780 + 762 781 spin_lock(&blocked_lock_lock); 763 782 if (waiter->fl_blocker) 764 783 status = 0; 765 784 __locks_wake_up_blocks(waiter); 766 785 __locks_delete_block(waiter); 786 + 787 + /* 788 + * The setting of fl_blocker to NULL marks the "done" point in deleting 789 + * a block. Paired with acquire at the top of this function. 790 + */ 791 + smp_store_release(&waiter->fl_blocker, NULL); 767 792 spin_unlock(&blocked_lock_lock); 768 793 return status; 769 794 } ··· 1387 1350 error = posix_lock_inode(inode, fl, NULL); 1388 1351 if (error != FILE_LOCK_DEFERRED) 1389 1352 break; 1390 - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); 1353 + error = wait_event_interruptible(fl->fl_wait, 1354 + list_empty(&fl->fl_blocked_member)); 1391 1355 if (error) 1392 1356 break; 1393 1357 } ··· 1473 1435 error = posix_lock_inode(inode, &fl, NULL); 1474 1436 if (error != FILE_LOCK_DEFERRED) 1475 1437 break; 1476 - error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker); 1438 + error = wait_event_interruptible(fl.fl_wait, 1439 + list_empty(&fl.fl_blocked_member)); 1477 1440 if (!error) { 1478 1441 /* 1479 1442 * If we've been sleeping someone might have ··· 1677 1638 1678 1639 locks_dispose_list(&dispose); 1679 1640 error = wait_event_interruptible_timeout(new_fl->fl_wait, 1680 - !new_fl->fl_blocker, break_time); 1641 + list_empty(&new_fl->fl_blocked_member), 1642 + break_time); 1681 1643 1682 1644 percpu_down_read(&file_rwsem); 1683 1645 spin_lock(&ctx->flc_lock); ··· 2162 2122 error = flock_lock_inode(inode, fl); 2163 2123 if (error != FILE_LOCK_DEFERRED) 2164 2124 break; 2165 - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); 2125 + error = wait_event_interruptible(fl->fl_wait, 2126 + list_empty(&fl->fl_blocked_member)); 2166 2127 if (error) 2167 2128 break; 2168 2129 } ··· 2440 2399 error = vfs_lock_file(filp, cmd, fl, NULL); 2441 2400 if (error != FILE_LOCK_DEFERRED) 2442 2401 break; 2443 - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); 2402 + error = wait_event_interruptible(fl->fl_wait, 2403 + list_empty(&fl->fl_blocked_member)); 2444 2404 if (error) 2445 2405 break; 2446 2406 }