Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arch/tile: fix deadlock bugs in rwlock implementation

The first issue fixed in this patch is that pending rwlock write locks
could lock out new readers; this could cause a deadlock if a read lock was
held on cpu 1, a write lock was then attempted on cpu 2 and was pending,
and cpu 1 was interrupted and attempted to re-acquire a read lock.
The write lock code was modified to not lock out new readers.

The second issue fixed is that there was a narrow race window where a tns
instruction had been issued (setting the lock value to "1") and the store
instruction to reset the lock value correctly had not yet been issued.
In this case, if an interrupt occurred and the same cpu then tried to
manipulate the lock, it would find the lock value set to "1" and spin
forever, assuming some other cpu was partway through updating it. The fix
is to enforce an interrupt critical section around the tns/store pair.

In addition, this change now arranges to always validate that after
a readlock we have not wrapped around the count of readers, which
is only eight bits.

Since these changes make the rwlock "fast path" code heavier weight,
I decided to move all the rwlock code all out of line, leaving only the
conventional spinlock code with fastpath inlines. Since the read_lock
and read_trylock implementations ended up very similar, I just expressed
read_lock in terms of read_trylock.

As part of this change I also eliminate support for the now-obsolete
tns_atomic mode.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>

+104 -142
+7 -76
arch/tile/include/asm/spinlock_32.h
··· 78 78 #define _RD_COUNT_SHIFT 24 79 79 #define _RD_COUNT_WIDTH 8 80 80 81 - /* Internal functions; do not use. */ 82 - void arch_read_lock_slow(arch_rwlock_t *, u32); 83 - int arch_read_trylock_slow(arch_rwlock_t *); 84 - void arch_read_unlock_slow(arch_rwlock_t *); 85 - void arch_write_lock_slow(arch_rwlock_t *, u32); 86 - void arch_write_unlock_slow(arch_rwlock_t *, u32); 87 - 88 81 /** 89 82 * arch_read_can_lock() - would read_trylock() succeed? 90 83 */ ··· 97 104 /** 98 105 * arch_read_lock() - acquire a read lock. 99 106 */ 100 - static inline void arch_read_lock(arch_rwlock_t *rwlock) 101 - { 102 - u32 val = __insn_tns((int *)&rwlock->lock); 103 - if (unlikely(val << _RD_COUNT_WIDTH)) { 104 - arch_read_lock_slow(rwlock, val); 105 - return; 106 - } 107 - rwlock->lock = val + (1 << _RD_COUNT_SHIFT); 108 - } 107 + void arch_read_lock(arch_rwlock_t *rwlock); 109 108 110 109 /** 111 - * arch_read_lock() - acquire a write lock. 110 + * arch_write_lock() - acquire a write lock. 112 111 */ 113 - static inline void arch_write_lock(arch_rwlock_t *rwlock) 114 - { 115 - u32 val = __insn_tns((int *)&rwlock->lock); 116 - if (unlikely(val != 0)) { 117 - arch_write_lock_slow(rwlock, val); 118 - return; 119 - } 120 - rwlock->lock = 1 << _WR_NEXT_SHIFT; 121 - } 112 + void arch_write_lock(arch_rwlock_t *rwlock); 122 113 123 114 /** 124 115 * arch_read_trylock() - try to acquire a read lock. 125 116 */ 126 - static inline int arch_read_trylock(arch_rwlock_t *rwlock) 127 - { 128 - int locked; 129 - u32 val = __insn_tns((int *)&rwlock->lock); 130 - if (unlikely(val & 1)) 131 - return arch_read_trylock_slow(rwlock); 132 - locked = (val << _RD_COUNT_WIDTH) == 0; 133 - rwlock->lock = val + (locked << _RD_COUNT_SHIFT); 134 - return locked; 135 - } 117 + int arch_read_trylock(arch_rwlock_t *rwlock); 136 118 137 119 /** 138 120 * arch_write_trylock() - try to acquire a write lock. 139 121 */ 140 - static inline int arch_write_trylock(arch_rwlock_t *rwlock) 141 - { 142 - u32 val = __insn_tns((int *)&rwlock->lock); 143 - 144 - /* 145 - * If a tns is in progress, or there's a waiting or active locker, 146 - * or active readers, we can't take the lock, so give up. 147 - */ 148 - if (unlikely(val != 0)) { 149 - if (!(val & 1)) 150 - rwlock->lock = val; 151 - return 0; 152 - } 153 - 154 - /* Set the "next" field to mark it locked. */ 155 - rwlock->lock = 1 << _WR_NEXT_SHIFT; 156 - return 1; 157 - } 122 + int arch_write_trylock(arch_rwlock_t *rwlock); 158 123 159 124 /** 160 125 * arch_read_unlock() - release a read lock. 161 126 */ 162 - static inline void arch_read_unlock(arch_rwlock_t *rwlock) 163 - { 164 - u32 val; 165 - mb(); /* guarantee anything modified under the lock is visible */ 166 - val = __insn_tns((int *)&rwlock->lock); 167 - if (unlikely(val & 1)) { 168 - arch_read_unlock_slow(rwlock); 169 - return; 170 - } 171 - rwlock->lock = val - (1 << _RD_COUNT_SHIFT); 172 - } 127 + void arch_read_unlock(arch_rwlock_t *rwlock); 173 128 174 129 /** 175 130 * arch_write_unlock() - release a write lock. 176 131 */ 177 - static inline void arch_write_unlock(arch_rwlock_t *rwlock) 178 - { 179 - u32 val; 180 - mb(); /* guarantee anything modified under the lock is visible */ 181 - val = __insn_tns((int *)&rwlock->lock); 182 - if (unlikely(val != (1 << _WR_NEXT_SHIFT))) { 183 - arch_write_unlock_slow(rwlock, val); 184 - return; 185 - } 186 - rwlock->lock = 0; 187 - } 132 + void arch_write_unlock(arch_rwlock_t *rwlock); 188 133 189 134 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 190 135 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+97 -66
arch/tile/lib/spinlock_32.c
··· 15 15 #include <linux/spinlock.h> 16 16 #include <linux/module.h> 17 17 #include <asm/processor.h> 18 + #include <arch/spr_def.h> 18 19 19 20 #include "spinlock_common.h" 20 21 ··· 92 91 #define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) 93 92 94 93 95 - /* Lock the word, spinning until there are no tns-ers. */ 96 - static inline u32 get_rwlock(arch_rwlock_t *rwlock) 94 + /* 95 + * We can get the read lock if everything but the reader bits (which 96 + * are in the high part of the word) is zero, i.e. no active or 97 + * waiting writers, no tns. 98 + * 99 + * We guard the tns/store-back with an interrupt critical section to 100 + * preserve the semantic that the same read lock can be acquired in an 101 + * interrupt context. 102 + */ 103 + inline int arch_read_trylock(arch_rwlock_t *rwlock) 97 104 { 98 - u32 iterations = 0; 99 - for (;;) { 100 - u32 val = __insn_tns((int *)&rwlock->lock); 101 - if (unlikely(val & 1)) { 102 - delay_backoff(iterations++); 103 - continue; 104 - } 105 - return val; 105 + u32 val; 106 + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); 107 + val = __insn_tns((int *)&rwlock->lock); 108 + if (likely((val << _RD_COUNT_WIDTH) == 0)) { 109 + val += 1 << RD_COUNT_SHIFT; 110 + rwlock->lock = val; 111 + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); 112 + BUG_ON(val == 0); /* we don't expect wraparound */ 113 + return 1; 106 114 } 115 + if ((val & 1) == 0) 116 + rwlock->lock = val; 117 + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); 118 + return 0; 107 119 } 108 - 109 - int arch_read_trylock_slow(arch_rwlock_t *rwlock) 110 - { 111 - u32 val = get_rwlock(rwlock); 112 - int locked = (val << RD_COUNT_WIDTH) == 0; 113 - rwlock->lock = val + (locked << RD_COUNT_SHIFT); 114 - return locked; 115 - } 116 - EXPORT_SYMBOL(arch_read_trylock_slow); 117 - 118 - void arch_read_unlock_slow(arch_rwlock_t *rwlock) 119 - { 120 - u32 val = get_rwlock(rwlock); 121 - rwlock->lock = val - (1 << RD_COUNT_SHIFT); 122 - } 123 - EXPORT_SYMBOL(arch_read_unlock_slow); 124 - 125 - void arch_write_unlock_slow(arch_rwlock_t *rwlock, u32 val) 126 - { 127 - u32 eq, mask = 1 << WR_CURR_SHIFT; 128 - while (unlikely(val & 1)) { 129 - /* Limited backoff since we are the highest-priority task. */ 130 - relax(4); 131 - val = __insn_tns((int *)&rwlock->lock); 132 - } 133 - val = __insn_addb(val, mask); 134 - eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); 135 - val = __insn_mz(eq & mask, val); 136 - rwlock->lock = val; 137 - } 138 - EXPORT_SYMBOL(arch_write_unlock_slow); 120 + EXPORT_SYMBOL(arch_read_trylock); 139 121 140 122 /* 141 - * We spin until everything but the reader bits (which are in the high 142 - * part of the word) are zero, i.e. no active or waiting writers, no tns. 143 - * 123 + * Spin doing arch_read_trylock() until we acquire the lock. 144 124 * ISSUE: This approach can permanently starve readers. A reader who sees 145 125 * a writer could instead take a ticket lock (just like a writer would), 146 126 * and atomically enter read mode (with 1 reader) when it gets the ticket. 147 - * This way both readers and writers will always make forward progress 127 + * This way both readers and writers would always make forward progress 148 128 * in a finite time. 149 129 */ 150 - void arch_read_lock_slow(arch_rwlock_t *rwlock, u32 val) 130 + void arch_read_lock(arch_rwlock_t *rwlock) 151 131 { 152 132 u32 iterations = 0; 153 - do { 154 - if (!(val & 1)) 155 - rwlock->lock = val; 133 + while (unlikely(!arch_read_trylock(rwlock))) 156 134 delay_backoff(iterations++); 157 - val = __insn_tns((int *)&rwlock->lock); 158 - } while ((val << RD_COUNT_WIDTH) != 0); 159 - rwlock->lock = val + (1 << RD_COUNT_SHIFT); 160 135 } 161 - EXPORT_SYMBOL(arch_read_lock_slow); 136 + EXPORT_SYMBOL(arch_read_lock); 162 137 163 - void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) 138 + void arch_read_unlock(arch_rwlock_t *rwlock) 139 + { 140 + u32 val, iterations = 0; 141 + 142 + mb(); /* guarantee anything modified under the lock is visible */ 143 + for (;;) { 144 + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); 145 + val = __insn_tns((int *)&rwlock->lock); 146 + if (likely(val & 1) == 0) { 147 + rwlock->lock = val - (1 << _RD_COUNT_SHIFT); 148 + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); 149 + break; 150 + } 151 + __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); 152 + delay_backoff(iterations++); 153 + } 154 + } 155 + EXPORT_SYMBOL(arch_read_unlock); 156 + 157 + /* 158 + * We don't need an interrupt critical section here (unlike for 159 + * arch_read_lock) since we should never use a bare write lock where 160 + * it could be interrupted by code that could try to re-acquire it. 161 + */ 162 + void arch_write_lock(arch_rwlock_t *rwlock) 164 163 { 165 164 /* 166 165 * The trailing underscore on this variable (and curr_ below) ··· 169 168 */ 170 169 u32 my_ticket_; 171 170 u32 iterations = 0; 171 + u32 val = __insn_tns((int *)&rwlock->lock); 172 + 173 + if (likely(val == 0)) { 174 + rwlock->lock = 1 << _WR_NEXT_SHIFT; 175 + return; 176 + } 172 177 173 178 /* 174 179 * Wait until there are no readers, then bump up the next ··· 213 206 relax(4); 214 207 } 215 208 } 216 - EXPORT_SYMBOL(arch_write_lock_slow); 209 + EXPORT_SYMBOL(arch_write_lock); 217 210 218 - int __tns_atomic_acquire(atomic_t *lock) 211 + int arch_write_trylock(arch_rwlock_t *rwlock) 219 212 { 220 - int ret; 221 - u32 iterations = 0; 213 + u32 val = __insn_tns((int *)&rwlock->lock); 222 214 223 - BUG_ON(__insn_mfspr(SPR_INTERRUPT_CRITICAL_SECTION)); 224 - __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); 215 + /* 216 + * If a tns is in progress, or there's a waiting or active locker, 217 + * or active readers, we can't take the lock, so give up. 218 + */ 219 + if (unlikely(val != 0)) { 220 + if (!(val & 1)) 221 + rwlock->lock = val; 222 + return 0; 223 + } 225 224 226 - while ((ret = __insn_tns((void *)&lock->counter)) == 1) 227 - delay_backoff(iterations++); 228 - return ret; 225 + /* Set the "next" field to mark it locked. */ 226 + rwlock->lock = 1 << _WR_NEXT_SHIFT; 227 + return 1; 229 228 } 229 + EXPORT_SYMBOL(arch_write_trylock); 230 230 231 - void __tns_atomic_release(atomic_t *p, int v) 231 + void arch_write_unlock(arch_rwlock_t *rwlock) 232 232 { 233 - p->counter = v; 234 - __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); 233 + u32 val, eq, mask; 234 + 235 + mb(); /* guarantee anything modified under the lock is visible */ 236 + val = __insn_tns((int *)&rwlock->lock); 237 + if (likely(val == (1 << _WR_NEXT_SHIFT))) { 238 + rwlock->lock = 0; 239 + return; 240 + } 241 + while (unlikely(val & 1)) { 242 + /* Limited backoff since we are the highest-priority task. */ 243 + relax(4); 244 + val = __insn_tns((int *)&rwlock->lock); 245 + } 246 + mask = 1 << WR_CURR_SHIFT; 247 + val = __insn_addb(val, mask); 248 + eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); 249 + val = __insn_mz(eq & mask, val); 250 + rwlock->lock = val; 235 251 } 252 + EXPORT_SYMBOL(arch_write_unlock);