at v3.9 299 lines 8.7 kB view raw
1/* rwsem.c: R/W semaphores: contention handling functions 2 * 3 * Written by David Howells (dhowells@redhat.com). 4 * Derived from arch/i386/kernel/semaphore.c 5 * 6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com> 7 */ 8#include <linux/rwsem.h> 9#include <linux/sched.h> 10#include <linux/init.h> 11#include <linux/export.h> 12 13/* 14 * Initialize an rwsem: 15 */ 16void __init_rwsem(struct rw_semaphore *sem, const char *name, 17 struct lock_class_key *key) 18{ 19#ifdef CONFIG_DEBUG_LOCK_ALLOC 20 /* 21 * Make sure we are not reinitializing a held semaphore: 22 */ 23 debug_check_no_locks_freed((void *)sem, sizeof(*sem)); 24 lockdep_init_map(&sem->dep_map, name, key, 0); 25#endif 26 sem->count = RWSEM_UNLOCKED_VALUE; 27 raw_spin_lock_init(&sem->wait_lock); 28 INIT_LIST_HEAD(&sem->wait_list); 29} 30 31EXPORT_SYMBOL(__init_rwsem); 32 33struct rwsem_waiter { 34 struct list_head list; 35 struct task_struct *task; 36 unsigned int flags; 37#define RWSEM_WAITING_FOR_READ 0x00000001 38#define RWSEM_WAITING_FOR_WRITE 0x00000002 39}; 40 41/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and 42 * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held 43 * since the rwsem value was observed. 44 */ 45#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ 46#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ 47#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ 48 49/* 50 * handle the lock release when processes blocked on it that can now run 51 * - if we come here from up_xxxx(), then: 52 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) 53 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) 54 * - there must be someone on the queue 55 * - the spinlock must be held by the caller 56 * - woken process blocks are discarded from the list after having task zeroed 57 * - writers are only woken if downgrading is false 58 */ 59static struct rw_semaphore * 60__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) 61{ 62 struct rwsem_waiter *waiter; 63 struct task_struct *tsk; 64 struct list_head *next; 65 signed long woken, loop, adjustment; 66 67 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 68 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) 69 goto readers_only; 70 71 if (wake_type == RWSEM_WAKE_READ_OWNED) 72 /* Another active reader was observed, so wakeup is not 73 * likely to succeed. Save the atomic op. 74 */ 75 goto out; 76 77 /* Wake up the writing waiter and let the task grab the sem: */ 78 wake_up_process(waiter->task); 79 goto out; 80 81 readers_only: 82 /* If we come here from up_xxxx(), another thread might have reached 83 * rwsem_down_failed_common() before we acquired the spinlock and 84 * woken up a waiter, making it now active. We prefer to check for 85 * this first in order to not spend too much time with the spinlock 86 * held if we're not going to be able to wake up readers in the end. 87 * 88 * Note that we do not need to update the rwsem count: any writer 89 * trying to acquire rwsem will run rwsem_down_write_failed() due 90 * to the waiting threads and block trying to acquire the spinlock. 91 * 92 * We use a dummy atomic update in order to acquire the cache line 93 * exclusively since we expect to succeed and run the final rwsem 94 * count adjustment pretty soon. 95 */ 96 if (wake_type == RWSEM_WAKE_ANY && 97 rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) 98 /* Someone grabbed the sem for write already */ 99 goto out; 100 101 /* Grant an infinite number of read locks to the readers at the front 102 * of the queue. Note we increment the 'active part' of the count by 103 * the number of readers before waking any processes up. 104 */ 105 woken = 0; 106 do { 107 woken++; 108 109 if (waiter->list.next == &sem->wait_list) 110 break; 111 112 waiter = list_entry(waiter->list.next, 113 struct rwsem_waiter, list); 114 115 } while (waiter->flags & RWSEM_WAITING_FOR_READ); 116 117 adjustment = woken * RWSEM_ACTIVE_READ_BIAS; 118 if (waiter->flags & RWSEM_WAITING_FOR_READ) 119 /* hit end of list above */ 120 adjustment -= RWSEM_WAITING_BIAS; 121 122 rwsem_atomic_add(adjustment, sem); 123 124 next = sem->wait_list.next; 125 for (loop = woken; loop > 0; loop--) { 126 waiter = list_entry(next, struct rwsem_waiter, list); 127 next = waiter->list.next; 128 tsk = waiter->task; 129 smp_mb(); 130 waiter->task = NULL; 131 wake_up_process(tsk); 132 put_task_struct(tsk); 133 } 134 135 sem->wait_list.next = next; 136 next->prev = &sem->wait_list; 137 138 out: 139 return sem; 140} 141 142/* Try to get write sem, caller holds sem->wait_lock: */ 143static int try_get_writer_sem(struct rw_semaphore *sem, 144 struct rwsem_waiter *waiter) 145{ 146 struct rwsem_waiter *fwaiter; 147 long oldcount, adjustment; 148 149 /* only steal when first waiter is writing */ 150 fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 151 if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) 152 return 0; 153 154 adjustment = RWSEM_ACTIVE_WRITE_BIAS; 155 /* Only one waiter in the queue: */ 156 if (fwaiter == waiter && waiter->list.next == &sem->wait_list) 157 adjustment -= RWSEM_WAITING_BIAS; 158 159try_again_write: 160 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; 161 if (!(oldcount & RWSEM_ACTIVE_MASK)) { 162 /* No active lock: */ 163 struct task_struct *tsk = waiter->task; 164 165 list_del(&waiter->list); 166 smp_mb(); 167 put_task_struct(tsk); 168 tsk->state = TASK_RUNNING; 169 return 1; 170 } 171 /* some one grabbed the sem already */ 172 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) 173 return 0; 174 goto try_again_write; 175} 176 177/* 178 * wait for a lock to be granted 179 */ 180static struct rw_semaphore __sched * 181rwsem_down_failed_common(struct rw_semaphore *sem, 182 unsigned int flags, signed long adjustment) 183{ 184 struct rwsem_waiter waiter; 185 struct task_struct *tsk = current; 186 signed long count; 187 188 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 189 190 /* set up my own style of waitqueue */ 191 raw_spin_lock_irq(&sem->wait_lock); 192 waiter.task = tsk; 193 waiter.flags = flags; 194 get_task_struct(tsk); 195 196 if (list_empty(&sem->wait_list)) 197 adjustment += RWSEM_WAITING_BIAS; 198 list_add_tail(&waiter.list, &sem->wait_list); 199 200 /* we're now waiting on the lock, but no longer actively locking */ 201 count = rwsem_atomic_update(adjustment, sem); 202 203 /* If there are no active locks, wake the front queued process(es) up. 204 * 205 * Alternatively, if we're called from a failed down_write(), there 206 * were already threads queued before us and there are no active 207 * writers, the lock must be read owned; so we try to wake any read 208 * locks that were queued ahead of us. */ 209 if (count == RWSEM_WAITING_BIAS) 210 sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); 211 else if (count > RWSEM_WAITING_BIAS && 212 adjustment == -RWSEM_ACTIVE_WRITE_BIAS) 213 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); 214 215 raw_spin_unlock_irq(&sem->wait_lock); 216 217 /* wait to be given the lock */ 218 for (;;) { 219 if (!waiter.task) 220 break; 221 222 raw_spin_lock_irq(&sem->wait_lock); 223 /* Try to get the writer sem, may steal from the head writer: */ 224 if (flags == RWSEM_WAITING_FOR_WRITE) 225 if (try_get_writer_sem(sem, &waiter)) { 226 raw_spin_unlock_irq(&sem->wait_lock); 227 return sem; 228 } 229 raw_spin_unlock_irq(&sem->wait_lock); 230 schedule(); 231 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 232 } 233 234 tsk->state = TASK_RUNNING; 235 236 return sem; 237} 238 239/* 240 * wait for the read lock to be granted 241 */ 242struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) 243{ 244 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, 245 -RWSEM_ACTIVE_READ_BIAS); 246} 247 248/* 249 * wait for the write lock to be granted 250 */ 251struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) 252{ 253 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, 254 -RWSEM_ACTIVE_WRITE_BIAS); 255} 256 257/* 258 * handle waking up a waiter on the semaphore 259 * - up_read/up_write has decremented the active part of count if we come here 260 */ 261struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) 262{ 263 unsigned long flags; 264 265 raw_spin_lock_irqsave(&sem->wait_lock, flags); 266 267 /* do nothing if list empty */ 268 if (!list_empty(&sem->wait_list)) 269 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); 270 271 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 272 273 return sem; 274} 275 276/* 277 * downgrade a write lock into a read lock 278 * - caller incremented waiting part of count and discovered it still negative 279 * - just wake up any readers at the front of the queue 280 */ 281struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) 282{ 283 unsigned long flags; 284 285 raw_spin_lock_irqsave(&sem->wait_lock, flags); 286 287 /* do nothing if list empty */ 288 if (!list_empty(&sem->wait_list)) 289 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); 290 291 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 292 293 return sem; 294} 295 296EXPORT_SYMBOL(rwsem_down_read_failed); 297EXPORT_SYMBOL(rwsem_down_write_failed); 298EXPORT_SYMBOL(rwsem_wake); 299EXPORT_SYMBOL(rwsem_downgrade_wake);