Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

wait_on_bit: add an acquire memory barrier

There are several places in the kernel where wait_on_bit is not followed
by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read).

On architectures with weak memory ordering, it may happen that memory
accesses that follow wait_on_bit are reordered before wait_on_bit and
they may return invalid data.

Fix this class of bugs by introducing a new function "test_bit_acquire"
that works like test_bit, but has acquire memory ordering semantics.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Mikulas Patocka and committed by
Linus Torvalds
8238b457 4c612826

+60 -12
+4 -6
Documentation/atomic_bitops.txt
··· 58 58 59 59 - RMW operations that have a return value are fully ordered. 60 60 61 - - RMW operations that are conditional are unordered on FAILURE, 62 - otherwise the above rules apply. In the case of test_and_set_bit_lock(), 63 - if the bit in memory is unchanged by the operation then it is deemed to have 64 - failed. 61 + - RMW operations that are conditional are fully ordered. 65 62 66 - Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and 67 - clear_bit_unlock() which has RELEASE semantics. 63 + Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics, 64 + clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has 65 + ACQUIRE semantics. 68 66 69 67 Since a platform only has a single means of achieving atomic operations 70 68 the same barriers as for atomic_t are used, see atomic_t.txt.
+21
arch/x86/include/asm/bitops.h
··· 207 207 (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; 208 208 } 209 209 210 + static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) 211 + { 212 + bool oldbit; 213 + 214 + asm volatile("testb %2,%1" 215 + CC_SET(nz) 216 + : CC_OUT(nz) (oldbit) 217 + : "m" (((unsigned char *)addr)[nr >> 3]), 218 + "i" (1 << (nr & 7)) 219 + :"memory"); 220 + 221 + return oldbit; 222 + } 223 + 210 224 static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) 211 225 { 212 226 bool oldbit; ··· 237 223 arch_test_bit(unsigned long nr, const volatile unsigned long *addr) 238 224 { 239 225 return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) : 226 + variable_test_bit(nr, addr); 227 + } 228 + 229 + static __always_inline bool 230 + arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) 231 + { 232 + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : 240 233 variable_test_bit(nr, addr); 241 234 } 242 235
+14
include/asm-generic/bitops/generic-non-atomic.h
··· 4 4 #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H 5 5 6 6 #include <linux/bits.h> 7 + #include <asm/barrier.h> 7 8 8 9 #ifndef _LINUX_BITOPS_H 9 10 #error only <linux/bitops.h> can be included directly ··· 128 127 return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); 129 128 } 130 129 130 + /** 131 + * generic_test_bit_acquire - Determine, with acquire semantics, whether a bit is set 132 + * @nr: bit number to test 133 + * @addr: Address to start counting from 134 + */ 135 + static __always_inline bool 136 + generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) 137 + { 138 + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); 139 + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); 140 + } 141 + 131 142 /* 132 143 * const_*() definitions provide good compile-time optimizations when 133 144 * the passed arguments can be resolved at compile time. ··· 150 137 #define const___test_and_set_bit generic___test_and_set_bit 151 138 #define const___test_and_clear_bit generic___test_and_clear_bit 152 139 #define const___test_and_change_bit generic___test_and_change_bit 140 + #define const_test_bit_acquire generic_test_bit_acquire 153 141 154 142 /** 155 143 * const_test_bit - Determine whether a bit is set
+12
include/asm-generic/bitops/instrumented-non-atomic.h
··· 142 142 return arch_test_bit(nr, addr); 143 143 } 144 144 145 + /** 146 + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set 147 + * @nr: bit number to test 148 + * @addr: Address to start counting from 149 + */ 150 + static __always_inline bool 151 + _test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) 152 + { 153 + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); 154 + return arch_test_bit_acquire(nr, addr); 155 + } 156 + 145 157 #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
+1
include/asm-generic/bitops/non-atomic.h
··· 13 13 #define arch___test_and_change_bit generic___test_and_change_bit 14 14 15 15 #define arch_test_bit generic_test_bit 16 + #define arch_test_bit_acquire generic_test_bit_acquire 16 17 17 18 #include <asm-generic/bitops/non-instrumented-non-atomic.h> 18 19
+1
include/asm-generic/bitops/non-instrumented-non-atomic.h
··· 12 12 #define ___test_and_change_bit arch___test_and_change_bit 13 13 14 14 #define _test_bit arch_test_bit 15 + #define _test_bit_acquire arch_test_bit_acquire 15 16 16 17 #endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */
+1
include/linux/bitops.h
··· 59 59 #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) 60 60 #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) 61 61 #define test_bit(nr, addr) bitop(_test_bit, nr, addr) 62 + #define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) 62 63 63 64 /* 64 65 * Include this here because some architectures need generic_ffs/fls in
+1 -1
include/linux/buffer_head.h
··· 156 156 * make it consistent with folio_test_uptodate 157 157 * pairs with smp_mb__before_atomic in set_buffer_uptodate 158 158 */ 159 - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; 159 + return test_bit_acquire(BH_Uptodate, &bh->b_state); 160 160 } 161 161 162 162 #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
+4 -4
include/linux/wait_bit.h
··· 71 71 wait_on_bit(unsigned long *word, int bit, unsigned mode) 72 72 { 73 73 might_sleep(); 74 - if (!test_bit(bit, word)) 74 + if (!test_bit_acquire(bit, word)) 75 75 return 0; 76 76 return out_of_line_wait_on_bit(word, bit, 77 77 bit_wait, ··· 96 96 wait_on_bit_io(unsigned long *word, int bit, unsigned mode) 97 97 { 98 98 might_sleep(); 99 - if (!test_bit(bit, word)) 99 + if (!test_bit_acquire(bit, word)) 100 100 return 0; 101 101 return out_of_line_wait_on_bit(word, bit, 102 102 bit_wait_io, ··· 123 123 unsigned long timeout) 124 124 { 125 125 might_sleep(); 126 - if (!test_bit(bit, word)) 126 + if (!test_bit_acquire(bit, word)) 127 127 return 0; 128 128 return out_of_line_wait_on_bit_timeout(word, bit, 129 129 bit_wait_timeout, ··· 151 151 unsigned mode) 152 152 { 153 153 might_sleep(); 154 - if (!test_bit(bit, word)) 154 + if (!test_bit_acquire(bit, word)) 155 155 return 0; 156 156 return out_of_line_wait_on_bit(word, bit, action, mode); 157 157 }
+1 -1
kernel/sched/wait_bit.c
··· 47 47 prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); 48 48 if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) 49 49 ret = (*action)(&wbq_entry->key, mode); 50 - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); 50 + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); 51 51 52 52 finish_wait(wq_head, &wbq_entry->wq_entry); 53 53