···125125#include <linux/syscalls.h>126126#include <linux/time.h>127127#include <linux/rcupdate.h>128128+#include <linux/pid_namespace.h>128129129130#include <asm/semaphore.h>130131#include <asm/uaccess.h>···186185 fl->fl_fasync = NULL;187186 fl->fl_owner = NULL;188187 fl->fl_pid = 0;188188+ fl->fl_nspid = NULL;189189 fl->fl_file = NULL;190190 fl->fl_flags = 0;191191 fl->fl_type = 0;···555553{556554 list_add(&fl->fl_link, &file_lock_list);557555556556+ fl->fl_nspid = get_pid(task_tgid(current));557557+558558 /* insert into file's list */559559 fl->fl_next = *pos;560560 *pos = fl;···587583588584 if (fl->fl_ops && fl->fl_ops->fl_remove)589585 fl->fl_ops->fl_remove(fl);586586+587587+ if (fl->fl_nspid) {588588+ put_pid(fl->fl_nspid);589589+ fl->fl_nspid = NULL;590590+ }590591591592 locks_wake_up_blocks(fl);592593 locks_free_lock(fl);···643634 return (locks_conflict(caller_fl, sys_fl));644635}645636646646-static int interruptible_sleep_on_locked(wait_queue_head_t *fl_wait, int timeout)647647-{648648- int result = 0;649649- DECLARE_WAITQUEUE(wait, current);650650-651651- __set_current_state(TASK_INTERRUPTIBLE);652652- add_wait_queue(fl_wait, &wait);653653- if (timeout == 0)654654- schedule();655655- else656656- result = schedule_timeout(timeout);657657- if (signal_pending(current))658658- result = -ERESTARTSYS;659659- remove_wait_queue(fl_wait, &wait);660660- __set_current_state(TASK_RUNNING);661661- return result;662662-}663663-664664-static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *waiter, int time)665665-{666666- int result;667667- locks_insert_block(blocker, waiter);668668- result = interruptible_sleep_on_locked(&waiter->fl_wait, time);669669- __locks_delete_block(waiter);670670- return result;671671-}672672-673637void674638posix_test_lock(struct file *filp, struct file_lock *fl)675639{···655673 if (posix_locks_conflict(fl, cfl))656674 break;657675 }658658- if (cfl)676676+ if (cfl) {659677 __locks_copy_lock(fl, cfl);660660- else678678+ if (cfl->fl_nspid)679679+ fl->fl_pid = pid_nr_ns(cfl->fl_nspid,680680+ task_active_pid_ns(current));681681+ } else661682 fl->fl_type = F_UNLCK;662683 unlock_kernel();663684 return;664685}665665-666686EXPORT_SYMBOL(posix_test_lock);667687668668-/* This function tests for deadlock condition before putting a process to669669- * sleep. The detection scheme is no longer recursive. Recursive was neat,670670- * but dangerous - we risked stack corruption if the lock data was bad, or671671- * if the recursion was too deep for any other reason.688688+/*689689+ * Deadlock detection:672690 *673673- * We rely on the fact that a task can only be on one lock's wait queue674674- * at a time. When we find blocked_task on a wait queue we can re-search675675- * with blocked_task equal to that queue's owner, until either blocked_task676676- * isn't found, or blocked_task is found on a queue owned by my_task.691691+ * We attempt to detect deadlocks that are due purely to posix file692692+ * locks.677693 *678678- * Note: the above assumption may not be true when handling lock requests679679- * from a broken NFS client. But broken NFS clients have a lot more to680680- * worry about than proper deadlock detection anyway... --okir694694+ * We assume that a task can be waiting for at most one lock at a time.695695+ * So for any acquired lock, the process holding that lock may be696696+ * waiting on at most one other lock. That lock in turns may be held by697697+ * someone waiting for at most one other lock. Given a requested lock698698+ * caller_fl which is about to wait for a conflicting lock block_fl, we699699+ * follow this chain of waiters to ensure we are not about to create a700700+ * cycle.681701 *682682- * However, the failure of this assumption (also possible in the case of683683- * multiple tasks sharing the same open file table) also means there's no684684- * guarantee that the loop below will terminate. As a hack, we give up685685- * after a few iterations.702702+ * Since we do this before we ever put a process to sleep on a lock, we703703+ * are ensured that there is never a cycle; that is what guarantees that704704+ * the while() loop in posix_locks_deadlock() eventually completes.705705+ *706706+ * Note: the above assumption may not be true when handling lock707707+ * requests from a broken NFS client. It may also fail in the presence708708+ * of tasks (such as posix threads) sharing the same open file table.709709+ *710710+ * To handle those cases, we just bail out after a few iterations.686711 */687712688713#define MAX_DEADLK_ITERATIONS 10689714715715+/* Find a lock that the owner of the given block_fl is blocking on. */716716+static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)717717+{718718+ struct file_lock *fl;719719+720720+ list_for_each_entry(fl, &blocked_list, fl_link) {721721+ if (posix_same_owner(fl, block_fl))722722+ return fl->fl_next;723723+ }724724+ return NULL;725725+}726726+690727static int posix_locks_deadlock(struct file_lock *caller_fl,691728 struct file_lock *block_fl)692729{693693- struct file_lock *fl;694730 int i = 0;695731696696-next_task:697697- if (posix_same_owner(caller_fl, block_fl))698698- return 1;699699- list_for_each_entry(fl, &blocked_list, fl_link) {700700- if (posix_same_owner(fl, block_fl)) {701701- if (i++ > MAX_DEADLK_ITERATIONS)702702- return 0;703703- fl = fl->fl_next;704704- block_fl = fl;705705- goto next_task;706706- }732732+ while ((block_fl = what_owner_is_waiting_for(block_fl))) {733733+ if (i++ > MAX_DEADLK_ITERATIONS)734734+ return 0;735735+ if (posix_same_owner(caller_fl, block_fl))736736+ return 1;707737 }708738 return 0;709739}···12501256 if (break_time == 0)12511257 break_time++;12521258 }12531253- error = locks_block_on_timeout(flock, new_fl, break_time);12591259+ locks_insert_block(flock, new_fl);12601260+ error = wait_event_interruptible_timeout(new_fl->fl_wait,12611261+ !new_fl->fl_next, break_time);12621262+ __locks_delete_block(new_fl);12541263 if (error >= 0) {12551264 if (error == 0)12561265 time_out_leases(inode);···20812084 int id, char *pfx)20822085{20832086 struct inode *inode = NULL;20872087+ unsigned int fl_pid;20882088+20892089+ if (fl->fl_nspid)20902090+ fl_pid = pid_nr_ns(fl->fl_nspid, task_active_pid_ns(current));20912091+ else20922092+ fl_pid = fl->fl_pid;2084209320852094 if (fl->fl_file != NULL)20862095 inode = fl->fl_file->f_path.dentry->d_inode;···21272124 }21282125 if (inode) {21292126#ifdef WE_CAN_BREAK_LSLK_NOW21302130- seq_printf(f, "%d %s:%ld ", fl->fl_pid,21272127+ seq_printf(f, "%d %s:%ld ", fl_pid,21312128 inode->i_sb->s_id, inode->i_ino);21322129#else21332130 /* userspace relies on this representation of dev_t ;-( */21342134- seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid,21312131+ seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,21352132 MAJOR(inode->i_sb->s_dev),21362133 MINOR(inode->i_sb->s_dev), inode->i_ino);21372134#endif21382135 } else {21392139- seq_printf(f, "%d <none>:0 ", fl->fl_pid);21362136+ seq_printf(f, "%d <none>:0 ", fl_pid);21402137 }21412138 if (IS_POSIX(fl)) {21422139 if (fl->fl_end == OFFSET_MAX)
+1
include/linux/fs.h
···872872 struct list_head fl_block; /* circular list of blocked processes */873873 fl_owner_t fl_owner;874874 unsigned int fl_pid;875875+ struct pid *fl_nspid;875876 wait_queue_head_t fl_wait;876877 struct file *fl_file;877878 unsigned char fl_flags;