+1
fs/inode.c
+1
fs/inode.c
+1
include/linux/fs.h
+1
include/linux/fs.h
+10
-7
include/linux/futex.h
+10
-7
include/linux/futex.h
···
31
31
32
32
union futex_key {
33
33
struct {
34
+
u64 i_seq;
34
35
unsigned long pgoff;
35
-
struct inode *inode;
36
-
int offset;
36
+
unsigned int offset;
37
37
} shared;
38
38
struct {
39
+
union {
40
+
struct mm_struct *mm;
41
+
u64 __tmp;
42
+
};
39
43
unsigned long address;
40
-
struct mm_struct *mm;
41
-
int offset;
44
+
unsigned int offset;
42
45
} private;
43
46
struct {
47
+
u64 ptr;
44
48
unsigned long word;
45
-
void *ptr;
46
-
int offset;
49
+
unsigned int offset;
47
50
} both;
48
51
};
49
52
50
-
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
53
+
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
51
54
52
55
#ifdef CONFIG_FUTEX
53
56
enum {
+53
-36
kernel/futex.c
+53
-36
kernel/futex.c
···
429
429
430
430
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
431
431
case FUT_OFF_INODE:
432
-
ihold(key->shared.inode); /* implies smp_mb(); (B) */
432
+
smp_mb(); /* explicit smp_mb(); (B) */
433
433
break;
434
434
case FUT_OFF_MMSHARED:
435
435
futex_get_mm(key); /* implies smp_mb(); (B) */
···
463
463
464
464
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
465
465
case FUT_OFF_INODE:
466
-
iput(key->shared.inode);
467
466
break;
468
467
case FUT_OFF_MMSHARED:
469
468
mmdrop(key->private.mm);
···
504
505
return timeout;
505
506
}
506
507
508
+
/*
509
+
* Generate a machine wide unique identifier for this inode.
510
+
*
511
+
* This relies on u64 not wrapping in the life-time of the machine; which with
512
+
* 1ns resolution means almost 585 years.
513
+
*
514
+
* This further relies on the fact that a well formed program will not unmap
515
+
* the file while it has a (shared) futex waiting on it. This mapping will have
516
+
* a file reference which pins the mount and inode.
517
+
*
518
+
* If for some reason an inode gets evicted and read back in again, it will get
519
+
* a new sequence number and will _NOT_ match, even though it is the exact same
520
+
* file.
521
+
*
522
+
* It is important that match_futex() will never have a false-positive, esp.
523
+
* for PI futexes that can mess up the state. The above argues that false-negatives
524
+
* are only possible for malformed programs.
525
+
*/
526
+
static u64 get_inode_sequence_number(struct inode *inode)
527
+
{
528
+
static atomic64_t i_seq;
529
+
u64 old;
530
+
531
+
/* Does the inode already have a sequence number? */
532
+
old = atomic64_read(&inode->i_sequence);
533
+
if (likely(old))
534
+
return old;
535
+
536
+
for (;;) {
537
+
u64 new = atomic64_add_return(1, &i_seq);
538
+
if (WARN_ON_ONCE(!new))
539
+
continue;
540
+
541
+
old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
542
+
if (old)
543
+
return old;
544
+
return new;
545
+
}
546
+
}
547
+
507
548
/**
508
549
* get_futex_key() - Get parameters which are the keys for a futex
509
550
* @uaddr: virtual address of the futex
···
556
517
*
557
518
* The key words are stored in @key on success.
558
519
*
559
-
* For shared mappings, it's (page->index, file_inode(vma->vm_file),
560
-
* offset_within_page). For private mappings, it's (uaddr, current->mm).
561
-
* We can usually work out the index without swapping in the page.
520
+
* For shared mappings (when @fshared), the key is:
521
+
* ( inode->i_sequence, page->index, offset_within_page )
522
+
* [ also see get_inode_sequence_number() ]
523
+
*
524
+
* For private mappings (or when !@fshared), the key is:
525
+
* ( current->mm, address, 0 )
526
+
*
527
+
* This allows (cross process, where applicable) identification of the futex
528
+
* without keeping the page pinned for the duration of the FUTEX_WAIT.
562
529
*
563
530
* lock_page() might sleep, the caller should not hold a spinlock.
564
531
*/
···
704
659
key->private.mm = mm;
705
660
key->private.address = address;
706
661
707
-
get_futex_key_refs(key); /* implies smp_mb(); (B) */
708
-
709
662
} else {
710
663
struct inode *inode;
711
664
···
735
692
goto again;
736
693
}
737
694
738
-
/*
739
-
* Take a reference unless it is about to be freed. Previously
740
-
* this reference was taken by ihold under the page lock
741
-
* pinning the inode in place so i_lock was unnecessary. The
742
-
* only way for this check to fail is if the inode was
743
-
* truncated in parallel which is almost certainly an
744
-
* application bug. In such a case, just retry.
745
-
*
746
-
* We are not calling into get_futex_key_refs() in file-backed
747
-
* cases, therefore a successful atomic_inc return below will
748
-
* guarantee that get_futex_key() will still imply smp_mb(); (B).
749
-
*/
750
-
if (!atomic_inc_not_zero(&inode->i_count)) {
751
-
rcu_read_unlock();
752
-
put_page(page);
753
-
754
-
goto again;
755
-
}
756
-
757
-
/* Should be impossible but lets be paranoid for now */
758
-
if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
759
-
err = -EFAULT;
760
-
rcu_read_unlock();
761
-
iput(inode);
762
-
763
-
goto out;
764
-
}
765
-
766
695
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
767
-
key->shared.inode = inode;
696
+
key->shared.i_seq = get_inode_sequence_number(inode);
768
697
key->shared.pgoff = basepage_index(tail);
769
698
rcu_read_unlock();
770
699
}
700
+
701
+
get_futex_key_refs(key); /* implies smp_mb(); (B) */
771
702
772
703
out:
773
704
put_page(page);