Merge branch 'locks' of git://linux-nfs.org/~bfields/linux

* 'locks' of git://linux-nfs.org/~bfields/linux:
pid-namespaces-vs-locks-interaction
file locks: Use wait_event_interruptible_timeout()
locks: clarify posix_locks_deadlock

+65 -61
+64 -61
fs/locks.c
··· 125 125 #include <linux/syscalls.h> 126 126 #include <linux/time.h> 127 127 #include <linux/rcupdate.h> 128 + #include <linux/pid_namespace.h> 128 129 129 130 #include <asm/semaphore.h> 130 131 #include <asm/uaccess.h> ··· 186 185 fl->fl_fasync = NULL; 187 186 fl->fl_owner = NULL; 188 187 fl->fl_pid = 0; 188 + fl->fl_nspid = NULL; 189 189 fl->fl_file = NULL; 190 190 fl->fl_flags = 0; 191 191 fl->fl_type = 0; ··· 555 553 { 556 554 list_add(&fl->fl_link, &file_lock_list); 557 555 556 + fl->fl_nspid = get_pid(task_tgid(current)); 557 + 558 558 /* insert into file's list */ 559 559 fl->fl_next = *pos; 560 560 *pos = fl; ··· 587 583 588 584 if (fl->fl_ops && fl->fl_ops->fl_remove) 589 585 fl->fl_ops->fl_remove(fl); 586 + 587 + if (fl->fl_nspid) { 588 + put_pid(fl->fl_nspid); 589 + fl->fl_nspid = NULL; 590 + } 590 591 591 592 locks_wake_up_blocks(fl); 592 593 locks_free_lock(fl); ··· 643 634 return (locks_conflict(caller_fl, sys_fl)); 644 635 } 645 636 646 - static int interruptible_sleep_on_locked(wait_queue_head_t *fl_wait, int timeout) 647 - { 648 - int result = 0; 649 - DECLARE_WAITQUEUE(wait, current); 650 - 651 - __set_current_state(TASK_INTERRUPTIBLE); 652 - add_wait_queue(fl_wait, &wait); 653 - if (timeout == 0) 654 - schedule(); 655 - else 656 - result = schedule_timeout(timeout); 657 - if (signal_pending(current)) 658 - result = -ERESTARTSYS; 659 - remove_wait_queue(fl_wait, &wait); 660 - __set_current_state(TASK_RUNNING); 661 - return result; 662 - } 663 - 664 - static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *waiter, int time) 665 - { 666 - int result; 667 - locks_insert_block(blocker, waiter); 668 - result = interruptible_sleep_on_locked(&waiter->fl_wait, time); 669 - __locks_delete_block(waiter); 670 - return result; 671 - } 672 - 673 637 void 674 638 posix_test_lock(struct file *filp, struct file_lock *fl) 675 639 { ··· 655 673 if (posix_locks_conflict(fl, cfl)) 656 674 break; 657 675 } 658 - if (cfl) 676 + if (cfl) { 659 677 __locks_copy_lock(fl, cfl); 660 - else 678 + if (cfl->fl_nspid) 679 + fl->fl_pid = pid_nr_ns(cfl->fl_nspid, 680 + task_active_pid_ns(current)); 681 + } else 661 682 fl->fl_type = F_UNLCK; 662 683 unlock_kernel(); 663 684 return; 664 685 } 665 - 666 686 EXPORT_SYMBOL(posix_test_lock); 667 687 668 - /* This function tests for deadlock condition before putting a process to 669 - * sleep. The detection scheme is no longer recursive. Recursive was neat, 670 - * but dangerous - we risked stack corruption if the lock data was bad, or 671 - * if the recursion was too deep for any other reason. 688 + /* 689 + * Deadlock detection: 672 690 * 673 - * We rely on the fact that a task can only be on one lock's wait queue 674 - * at a time. When we find blocked_task on a wait queue we can re-search 675 - * with blocked_task equal to that queue's owner, until either blocked_task 676 - * isn't found, or blocked_task is found on a queue owned by my_task. 691 + * We attempt to detect deadlocks that are due purely to posix file 692 + * locks. 677 693 * 678 - * Note: the above assumption may not be true when handling lock requests 679 - * from a broken NFS client. But broken NFS clients have a lot more to 680 - * worry about than proper deadlock detection anyway... --okir 694 + * We assume that a task can be waiting for at most one lock at a time. 695 + * So for any acquired lock, the process holding that lock may be 696 + * waiting on at most one other lock. That lock in turns may be held by 697 + * someone waiting for at most one other lock. Given a requested lock 698 + * caller_fl which is about to wait for a conflicting lock block_fl, we 699 + * follow this chain of waiters to ensure we are not about to create a 700 + * cycle. 681 701 * 682 - * However, the failure of this assumption (also possible in the case of 683 - * multiple tasks sharing the same open file table) also means there's no 684 - * guarantee that the loop below will terminate. As a hack, we give up 685 - * after a few iterations. 702 + * Since we do this before we ever put a process to sleep on a lock, we 703 + * are ensured that there is never a cycle; that is what guarantees that 704 + * the while() loop in posix_locks_deadlock() eventually completes. 705 + * 706 + * Note: the above assumption may not be true when handling lock 707 + * requests from a broken NFS client. It may also fail in the presence 708 + * of tasks (such as posix threads) sharing the same open file table. 709 + * 710 + * To handle those cases, we just bail out after a few iterations. 686 711 */ 687 712 688 713 #define MAX_DEADLK_ITERATIONS 10 689 714 715 + /* Find a lock that the owner of the given block_fl is blocking on. */ 716 + static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) 717 + { 718 + struct file_lock *fl; 719 + 720 + list_for_each_entry(fl, &blocked_list, fl_link) { 721 + if (posix_same_owner(fl, block_fl)) 722 + return fl->fl_next; 723 + } 724 + return NULL; 725 + } 726 + 690 727 static int posix_locks_deadlock(struct file_lock *caller_fl, 691 728 struct file_lock *block_fl) 692 729 { 693 - struct file_lock *fl; 694 730 int i = 0; 695 731 696 - next_task: 697 - if (posix_same_owner(caller_fl, block_fl)) 698 - return 1; 699 - list_for_each_entry(fl, &blocked_list, fl_link) { 700 - if (posix_same_owner(fl, block_fl)) { 701 - if (i++ > MAX_DEADLK_ITERATIONS) 702 - return 0; 703 - fl = fl->fl_next; 704 - block_fl = fl; 705 - goto next_task; 706 - } 732 + while ((block_fl = what_owner_is_waiting_for(block_fl))) { 733 + if (i++ > MAX_DEADLK_ITERATIONS) 734 + return 0; 735 + if (posix_same_owner(caller_fl, block_fl)) 736 + return 1; 707 737 } 708 738 return 0; 709 739 } ··· 1250 1256 if (break_time == 0) 1251 1257 break_time++; 1252 1258 } 1253 - error = locks_block_on_timeout(flock, new_fl, break_time); 1259 + locks_insert_block(flock, new_fl); 1260 + error = wait_event_interruptible_timeout(new_fl->fl_wait, 1261 + !new_fl->fl_next, break_time); 1262 + __locks_delete_block(new_fl); 1254 1263 if (error >= 0) { 1255 1264 if (error == 0) 1256 1265 time_out_leases(inode); ··· 2081 2084 int id, char *pfx) 2082 2085 { 2083 2086 struct inode *inode = NULL; 2087 + unsigned int fl_pid; 2088 + 2089 + if (fl->fl_nspid) 2090 + fl_pid = pid_nr_ns(fl->fl_nspid, task_active_pid_ns(current)); 2091 + else 2092 + fl_pid = fl->fl_pid; 2084 2093 2085 2094 if (fl->fl_file != NULL) 2086 2095 inode = fl->fl_file->f_path.dentry->d_inode; ··· 2127 2124 } 2128 2125 if (inode) { 2129 2126 #ifdef WE_CAN_BREAK_LSLK_NOW 2130 - seq_printf(f, "%d %s:%ld ", fl->fl_pid, 2127 + seq_printf(f, "%d %s:%ld ", fl_pid, 2131 2128 inode->i_sb->s_id, inode->i_ino); 2132 2129 #else 2133 2130 /* userspace relies on this representation of dev_t ;-( */ 2134 - seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid, 2131 + seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, 2135 2132 MAJOR(inode->i_sb->s_dev), 2136 2133 MINOR(inode->i_sb->s_dev), inode->i_ino); 2137 2134 #endif 2138 2135 } else { 2139 - seq_printf(f, "%d <none>:0 ", fl->fl_pid); 2136 + seq_printf(f, "%d <none>:0 ", fl_pid); 2140 2137 } 2141 2138 if (IS_POSIX(fl)) { 2142 2139 if (fl->fl_end == OFFSET_MAX)
+1
include/linux/fs.h
··· 872 872 struct list_head fl_block; /* circular list of blocked processes */ 873 873 fl_owner_t fl_owner; 874 874 unsigned int fl_pid; 875 + struct pid *fl_nspid; 875 876 wait_queue_head_t fl_wait; 876 877 struct file *fl_file; 877 878 unsigned char fl_flags;