Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull core locking changes from Ingo Molnar:
"The biggest change is the rwsem lock-steal improvements, both to the
assembly optimized and the spinlock based variants.

The other notable change is the clean up of the seqlock implementation
to be based on the seqcount infrastructure.

The rest is assorted smaller debuggability, cleanup and continued -rt
locking changes."

* 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
rwsem-spinlock: Implement writer lock-stealing for better scalability
futex: Revert "futex: Mark get_robust_list as deprecated"
generic: Use raw local irq variant for generic cmpxchg
lockdep: Selftest: convert spinlock to raw spinlock
seqlock: Use seqcount infrastructure
seqlock: Remove unused functions
ntp: Make ntp_lock raw
intel_idle: Convert i7300_idle_lock to raw_spinlock
locking: Various static lock initializer fixes
lockdep: Print more info when MAX_LOCK_DEPTH is exceeded
rwsem: Implement writer lock-stealing for better scalability
lockdep: Silence warning if CONFIG_LOCKDEP isn't set
watchdog: Use local_clock for get_timestamp()
lockdep: Rename print_unlock_inbalance_bug() to print_unlock_imbalance_bug()
locking/stat: Fix a typo

+222 -238
+1 -1
Documentation/lockstat.txt
··· 65 65 66 66 - CONFIGURATION 67 67 68 - Lock statistics are enabled via CONFIG_LOCK_STATS. 68 + Lock statistics are enabled via CONFIG_LOCK_STAT. 69 69 70 70 - USAGE 71 71
+3 -3
drivers/char/random.c
··· 445 445 .poolinfo = &poolinfo_table[0], 446 446 .name = "input", 447 447 .limit = 1, 448 - .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock), 448 + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), 449 449 .pool = input_pool_data 450 450 }; 451 451 ··· 454 454 .name = "blocking", 455 455 .limit = 1, 456 456 .pull = &input_pool, 457 - .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock), 457 + .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock), 458 458 .pool = blocking_pool_data 459 459 }; 460 460 ··· 462 462 .poolinfo = &poolinfo_table[1], 463 463 .name = "nonblocking", 464 464 .pull = &input_pool, 465 - .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock), 465 + .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), 466 466 .pool = nonblocking_pool_data 467 467 }; 468 468
+4 -4
drivers/idle/i7300_idle.c
··· 75 75 76 76 static struct pci_dev *fbd_dev; 77 77 78 - static spinlock_t i7300_idle_lock; 78 + static raw_spinlock_t i7300_idle_lock; 79 79 static int i7300_idle_active; 80 80 81 81 static u8 i7300_idle_thrtctl_saved; ··· 457 457 idle_begin_time = ktime_get(); 458 458 } 459 459 460 - spin_lock_irqsave(&i7300_idle_lock, flags); 460 + raw_spin_lock_irqsave(&i7300_idle_lock, flags); 461 461 if (val == IDLE_START) { 462 462 463 463 cpumask_set_cpu(smp_processor_id(), idle_cpumask); ··· 506 506 } 507 507 } 508 508 end: 509 - spin_unlock_irqrestore(&i7300_idle_lock, flags); 509 + raw_spin_unlock_irqrestore(&i7300_idle_lock, flags); 510 510 return 0; 511 511 } 512 512 ··· 548 548 549 549 static int __init i7300_idle_init(void) 550 550 { 551 - spin_lock_init(&i7300_idle_lock); 551 + raw_spin_lock_init(&i7300_idle_lock); 552 552 total_us = 0; 553 553 554 554 if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
+1 -1
drivers/usb/chipidea/debug.c
··· 222 222 } dbg_data = { 223 223 .idx = 0, 224 224 .tty = 0, 225 - .lck = __RW_LOCK_UNLOCKED(lck) 225 + .lck = __RW_LOCK_UNLOCKED(dbg_data.lck) 226 226 }; 227 227 228 228 /**
+1 -1
fs/file.c
··· 516 516 .close_on_exec = init_files.close_on_exec_init, 517 517 .open_fds = init_files.open_fds_init, 518 518 }, 519 - .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 + .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), 520 520 }; 521 521 522 522 /*
+4 -4
include/asm-generic/cmpxchg-local.h
··· 21 21 if (size == 8 && sizeof(unsigned long) != 8) 22 22 wrong_size_cmpxchg(ptr); 23 23 24 - local_irq_save(flags); 24 + raw_local_irq_save(flags); 25 25 switch (size) { 26 26 case 1: prev = *(u8 *)ptr; 27 27 if (prev == old) ··· 42 42 default: 43 43 wrong_size_cmpxchg(ptr); 44 44 } 45 - local_irq_restore(flags); 45 + raw_local_irq_restore(flags); 46 46 return prev; 47 47 } 48 48 ··· 55 55 u64 prev; 56 56 unsigned long flags; 57 57 58 - local_irq_save(flags); 58 + raw_local_irq_save(flags); 59 59 prev = *(u64 *)ptr; 60 60 if (prev == old) 61 61 *(u64 *)ptr = new; 62 - local_irq_restore(flags); 62 + raw_local_irq_restore(flags); 63 63 return prev; 64 64 } 65 65
+1 -1
include/linux/idr.h
··· 136 136 struct ida_bitmap *free_bitmap; 137 137 }; 138 138 139 - #define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } 139 + #define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } 140 140 #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) 141 141 142 142 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
+1 -1
include/linux/lockdep.h
··· 412 412 413 413 #define lockdep_depth(tsk) (0) 414 414 415 - #define lockdep_assert_held(l) do { } while (0) 415 + #define lockdep_assert_held(l) do { (void)(l); } while (0) 416 416 417 417 #define lockdep_recursing(tsk) (0) 418 418
+92 -103
include/linux/seqlock.h
··· 30 30 #include <linux/preempt.h> 31 31 #include <asm/processor.h> 32 32 33 - typedef struct { 34 - unsigned sequence; 35 - spinlock_t lock; 36 - } seqlock_t; 37 - 38 - /* 39 - * These macros triggered gcc-3.x compile-time problems. We think these are 40 - * OK now. Be cautious. 41 - */ 42 - #define __SEQLOCK_UNLOCKED(lockname) \ 43 - { 0, __SPIN_LOCK_UNLOCKED(lockname) } 44 - 45 - #define seqlock_init(x) \ 46 - do { \ 47 - (x)->sequence = 0; \ 48 - spin_lock_init(&(x)->lock); \ 49 - } while (0) 50 - 51 - #define DEFINE_SEQLOCK(x) \ 52 - seqlock_t x = __SEQLOCK_UNLOCKED(x) 53 - 54 - /* Lock out other writers and update the count. 55 - * Acts like a normal spin_lock/unlock. 56 - * Don't need preempt_disable() because that is in the spin_lock already. 57 - */ 58 - static inline void write_seqlock(seqlock_t *sl) 59 - { 60 - spin_lock(&sl->lock); 61 - ++sl->sequence; 62 - smp_wmb(); 63 - } 64 - 65 - static inline void write_sequnlock(seqlock_t *sl) 66 - { 67 - smp_wmb(); 68 - sl->sequence++; 69 - spin_unlock(&sl->lock); 70 - } 71 - 72 - static inline int write_tryseqlock(seqlock_t *sl) 73 - { 74 - int ret = spin_trylock(&sl->lock); 75 - 76 - if (ret) { 77 - ++sl->sequence; 78 - smp_wmb(); 79 - } 80 - return ret; 81 - } 82 - 83 - /* Start of read calculation -- fetch last complete writer token */ 84 - static __always_inline unsigned read_seqbegin(const seqlock_t *sl) 85 - { 86 - unsigned ret; 87 - 88 - repeat: 89 - ret = ACCESS_ONCE(sl->sequence); 90 - if (unlikely(ret & 1)) { 91 - cpu_relax(); 92 - goto repeat; 93 - } 94 - smp_rmb(); 95 - 96 - return ret; 97 - } 98 - 99 - /* 100 - * Test if reader processed invalid data. 101 - * 102 - * If sequence value changed then writer changed data while in section. 103 - */ 104 - static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start) 105 - { 106 - smp_rmb(); 107 - 108 - return unlikely(sl->sequence != start); 109 - } 110 - 111 - 112 33 /* 113 34 * Version using sequence counter only. 114 35 * This can be used when code has its own mutex protecting the 115 36 * updating starting before the write_seqcountbeqin() and ending 116 37 * after the write_seqcount_end(). 117 38 */ 118 - 119 39 typedef struct seqcount { 120 40 unsigned sequence; 121 41 } seqcount_t; ··· 138 218 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 139 219 { 140 220 smp_rmb(); 141 - 142 221 return __read_seqcount_retry(s, start); 143 222 } 144 223 ··· 171 252 s->sequence+=2; 172 253 } 173 254 255 + typedef struct { 256 + struct seqcount seqcount; 257 + spinlock_t lock; 258 + } seqlock_t; 259 + 174 260 /* 175 - * Possible sw/hw IRQ protected versions of the interfaces. 261 + * These macros triggered gcc-3.x compile-time problems. We think these are 262 + * OK now. Be cautious. 176 263 */ 264 + #define __SEQLOCK_UNLOCKED(lockname) \ 265 + { \ 266 + .seqcount = SEQCNT_ZERO, \ 267 + .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 268 + } 269 + 270 + #define seqlock_init(x) \ 271 + do { \ 272 + seqcount_init(&(x)->seqcount); \ 273 + spin_lock_init(&(x)->lock); \ 274 + } while (0) 275 + 276 + #define DEFINE_SEQLOCK(x) \ 277 + seqlock_t x = __SEQLOCK_UNLOCKED(x) 278 + 279 + /* 280 + * Read side functions for starting and finalizing a read side section. 281 + */ 282 + static inline unsigned read_seqbegin(const seqlock_t *sl) 283 + { 284 + return read_seqcount_begin(&sl->seqcount); 285 + } 286 + 287 + static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 288 + { 289 + return read_seqcount_retry(&sl->seqcount, start); 290 + } 291 + 292 + /* 293 + * Lock out other writers and update the count. 294 + * Acts like a normal spin_lock/unlock. 295 + * Don't need preempt_disable() because that is in the spin_lock already. 296 + */ 297 + static inline void write_seqlock(seqlock_t *sl) 298 + { 299 + spin_lock(&sl->lock); 300 + write_seqcount_begin(&sl->seqcount); 301 + } 302 + 303 + static inline void write_sequnlock(seqlock_t *sl) 304 + { 305 + write_seqcount_end(&sl->seqcount); 306 + spin_unlock(&sl->lock); 307 + } 308 + 309 + static inline void write_seqlock_bh(seqlock_t *sl) 310 + { 311 + spin_lock_bh(&sl->lock); 312 + write_seqcount_begin(&sl->seqcount); 313 + } 314 + 315 + static inline void write_sequnlock_bh(seqlock_t *sl) 316 + { 317 + write_seqcount_end(&sl->seqcount); 318 + spin_unlock_bh(&sl->lock); 319 + } 320 + 321 + static inline void write_seqlock_irq(seqlock_t *sl) 322 + { 323 + spin_lock_irq(&sl->lock); 324 + write_seqcount_begin(&sl->seqcount); 325 + } 326 + 327 + static inline void write_sequnlock_irq(seqlock_t *sl) 328 + { 329 + write_seqcount_end(&sl->seqcount); 330 + spin_unlock_irq(&sl->lock); 331 + } 332 + 333 + static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 334 + { 335 + unsigned long flags; 336 + 337 + spin_lock_irqsave(&sl->lock, flags); 338 + write_seqcount_begin(&sl->seqcount); 339 + return flags; 340 + } 341 + 177 342 #define write_seqlock_irqsave(lock, flags) \ 178 - do { local_irq_save(flags); write_seqlock(lock); } while (0) 179 - #define write_seqlock_irq(lock) \ 180 - do { local_irq_disable(); write_seqlock(lock); } while (0) 181 - #define write_seqlock_bh(lock) \ 182 - do { local_bh_disable(); write_seqlock(lock); } while (0) 343 + do { flags = __write_seqlock_irqsave(lock); } while (0) 183 344 184 - #define write_sequnlock_irqrestore(lock, flags) \ 185 - do { write_sequnlock(lock); local_irq_restore(flags); } while(0) 186 - #define write_sequnlock_irq(lock) \ 187 - do { write_sequnlock(lock); local_irq_enable(); } while(0) 188 - #define write_sequnlock_bh(lock) \ 189 - do { write_sequnlock(lock); local_bh_enable(); } while(0) 190 - 191 - #define read_seqbegin_irqsave(lock, flags) \ 192 - ({ local_irq_save(flags); read_seqbegin(lock); }) 193 - 194 - #define read_seqretry_irqrestore(lock, iv, flags) \ 195 - ({ \ 196 - int ret = read_seqretry(lock, iv); \ 197 - local_irq_restore(flags); \ 198 - ret; \ 199 - }) 345 + static inline void 346 + write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 347 + { 348 + write_seqcount_end(&sl->seqcount); 349 + spin_unlock_irqrestore(&sl->lock, flags); 350 + } 200 351 201 352 #endif /* __LINUX_SEQLOCK_H */
-2
kernel/futex.c
··· 2472 2472 if (!futex_cmpxchg_enabled) 2473 2473 return -ENOSYS; 2474 2474 2475 - WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); 2476 - 2477 2475 rcu_read_lock(); 2478 2476 2479 2477 ret = -ESRCH;
-2
kernel/futex_compat.c
··· 142 142 if (!futex_cmpxchg_enabled) 143 143 return -ENOSYS; 144 144 145 - WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); 146 - 147 145 rcu_read_lock(); 148 146 149 147 ret = -ESRCH;
+10 -5
kernel/lockdep.c
··· 3190 3190 #endif 3191 3191 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { 3192 3192 debug_locks_off(); 3193 - printk("BUG: MAX_LOCK_DEPTH too low!\n"); 3193 + printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n", 3194 + curr->lockdep_depth, MAX_LOCK_DEPTH); 3194 3195 printk("turning off the locking correctness validator.\n"); 3196 + 3197 + lockdep_print_held_locks(current); 3198 + debug_show_all_locks(); 3195 3199 dump_stack(); 3200 + 3196 3201 return 0; 3197 3202 } 3198 3203 ··· 3208 3203 } 3209 3204 3210 3205 static int 3211 - print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, 3206 + print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, 3212 3207 unsigned long ip) 3213 3208 { 3214 3209 if (!debug_locks_off()) ··· 3251 3246 return 0; 3252 3247 3253 3248 if (curr->lockdep_depth <= 0) 3254 - return print_unlock_inbalance_bug(curr, lock, ip); 3249 + return print_unlock_imbalance_bug(curr, lock, ip); 3255 3250 3256 3251 return 1; 3257 3252 } ··· 3322 3317 goto found_it; 3323 3318 prev_hlock = hlock; 3324 3319 } 3325 - return print_unlock_inbalance_bug(curr, lock, ip); 3320 + return print_unlock_imbalance_bug(curr, lock, ip); 3326 3321 3327 3322 found_it: 3328 3323 lockdep_init_map(lock, name, key, 0); ··· 3389 3384 goto found_it; 3390 3385 prev_hlock = hlock; 3391 3386 } 3392 - return print_unlock_inbalance_bug(curr, lock, ip); 3387 + return print_unlock_imbalance_bug(curr, lock, ip); 3393 3388 3394 3389 found_it: 3395 3390 if (hlock->instance == lock)
+13 -13
kernel/time/ntp.c
··· 23 23 * NTP timekeeping variables: 24 24 */ 25 25 26 - DEFINE_SPINLOCK(ntp_lock); 26 + DEFINE_RAW_SPINLOCK(ntp_lock); 27 27 28 28 29 29 /* USER_HZ period (usecs): */ ··· 348 348 { 349 349 unsigned long flags; 350 350 351 - spin_lock_irqsave(&ntp_lock, flags); 351 + raw_spin_lock_irqsave(&ntp_lock, flags); 352 352 353 353 time_adjust = 0; /* stop active adjtime() */ 354 354 time_status |= STA_UNSYNC; ··· 362 362 363 363 /* Clear PPS state variables */ 364 364 pps_clear(); 365 - spin_unlock_irqrestore(&ntp_lock, flags); 365 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 366 366 367 367 } 368 368 ··· 372 372 unsigned long flags; 373 373 s64 ret; 374 374 375 - spin_lock_irqsave(&ntp_lock, flags); 375 + raw_spin_lock_irqsave(&ntp_lock, flags); 376 376 ret = tick_length; 377 - spin_unlock_irqrestore(&ntp_lock, flags); 377 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 378 378 return ret; 379 379 } 380 380 ··· 395 395 int leap = 0; 396 396 unsigned long flags; 397 397 398 - spin_lock_irqsave(&ntp_lock, flags); 398 + raw_spin_lock_irqsave(&ntp_lock, flags); 399 399 400 400 /* 401 401 * Leap second processing. If in leap-insert state at the end of the ··· 479 479 time_adjust = 0; 480 480 481 481 out: 482 - spin_unlock_irqrestore(&ntp_lock, flags); 482 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 483 483 484 484 return leap; 485 485 } ··· 672 672 673 673 getnstimeofday(&ts); 674 674 675 - spin_lock_irq(&ntp_lock); 675 + raw_spin_lock_irq(&ntp_lock); 676 676 677 677 if (txc->modes & ADJ_ADJTIME) { 678 678 long save_adjust = time_adjust; ··· 714 714 /* fill PPS status fields */ 715 715 pps_fill_timex(txc); 716 716 717 - spin_unlock_irq(&ntp_lock); 717 + raw_spin_unlock_irq(&ntp_lock); 718 718 719 719 txc->time.tv_sec = ts.tv_sec; 720 720 txc->time.tv_usec = ts.tv_nsec; ··· 912 912 913 913 pts_norm = pps_normalize_ts(*phase_ts); 914 914 915 - spin_lock_irqsave(&ntp_lock, flags); 915 + raw_spin_lock_irqsave(&ntp_lock, flags); 916 916 917 917 /* clear the error bits, they will be set again if needed */ 918 918 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); ··· 925 925 * just start the frequency interval */ 926 926 if (unlikely(pps_fbase.tv_sec == 0)) { 927 927 pps_fbase = *raw_ts; 928 - spin_unlock_irqrestore(&ntp_lock, flags); 928 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 929 929 return; 930 930 } 931 931 ··· 940 940 time_status |= STA_PPSJITTER; 941 941 /* restart the frequency calibration interval */ 942 942 pps_fbase = *raw_ts; 943 - spin_unlock_irqrestore(&ntp_lock, flags); 943 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 944 944 pr_err("hardpps: PPSJITTER: bad pulse\n"); 945 945 return; 946 946 } ··· 957 957 958 958 hardpps_update_phase(pts_norm.nsec); 959 959 960 - spin_unlock_irqrestore(&ntp_lock, flags); 960 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 961 961 } 962 962 EXPORT_SYMBOL(hardpps); 963 963
+4 -6
kernel/watchdog.c
··· 113 113 * resolution, and we don't need to waste time with a big divide when 114 114 * 2^30ns == 1.074s. 115 115 */ 116 - static unsigned long get_timestamp(int this_cpu) 116 + static unsigned long get_timestamp(void) 117 117 { 118 - return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ 118 + return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ 119 119 } 120 120 121 121 static void set_sample_period(void) ··· 133 133 /* Commands for resetting the watchdog */ 134 134 static void __touch_watchdog(void) 135 135 { 136 - int this_cpu = smp_processor_id(); 137 - 138 - __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu)); 136 + __this_cpu_write(watchdog_touch_ts, get_timestamp()); 139 137 } 140 138 141 139 void touch_softlockup_watchdog(void) ··· 194 196 195 197 static int is_softlockup(unsigned long touch_ts) 196 198 { 197 - unsigned long now = get_timestamp(smp_processor_id()); 199 + unsigned long now = get_timestamp(); 198 200 199 201 /* Warn about unreasonable delays: */ 200 202 if (time_after(now, touch_ts + get_softlockup_thresh()))
+17 -17
lib/locking-selftest.c
··· 47 47 * Normal standalone locks, for the circular and irq-context 48 48 * dependency tests: 49 49 */ 50 - static DEFINE_SPINLOCK(lock_A); 51 - static DEFINE_SPINLOCK(lock_B); 52 - static DEFINE_SPINLOCK(lock_C); 53 - static DEFINE_SPINLOCK(lock_D); 50 + static DEFINE_RAW_SPINLOCK(lock_A); 51 + static DEFINE_RAW_SPINLOCK(lock_B); 52 + static DEFINE_RAW_SPINLOCK(lock_C); 53 + static DEFINE_RAW_SPINLOCK(lock_D); 54 54 55 55 static DEFINE_RWLOCK(rwlock_A); 56 56 static DEFINE_RWLOCK(rwlock_B); ··· 73 73 * but X* and Y* are different classes. We do this so that 74 74 * we do not trigger a real lockup: 75 75 */ 76 - static DEFINE_SPINLOCK(lock_X1); 77 - static DEFINE_SPINLOCK(lock_X2); 78 - static DEFINE_SPINLOCK(lock_Y1); 79 - static DEFINE_SPINLOCK(lock_Y2); 80 - static DEFINE_SPINLOCK(lock_Z1); 81 - static DEFINE_SPINLOCK(lock_Z2); 76 + static DEFINE_RAW_SPINLOCK(lock_X1); 77 + static DEFINE_RAW_SPINLOCK(lock_X2); 78 + static DEFINE_RAW_SPINLOCK(lock_Y1); 79 + static DEFINE_RAW_SPINLOCK(lock_Y2); 80 + static DEFINE_RAW_SPINLOCK(lock_Z1); 81 + static DEFINE_RAW_SPINLOCK(lock_Z2); 82 82 83 83 static DEFINE_RWLOCK(rwlock_X1); 84 84 static DEFINE_RWLOCK(rwlock_X2); ··· 107 107 */ 108 108 #define INIT_CLASS_FUNC(class) \ 109 109 static noinline void \ 110 - init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ 111 - struct rw_semaphore *rwsem) \ 110 + init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \ 111 + struct mutex *mutex, struct rw_semaphore *rwsem)\ 112 112 { \ 113 - spin_lock_init(lock); \ 113 + raw_spin_lock_init(lock); \ 114 114 rwlock_init(rwlock); \ 115 115 mutex_init(mutex); \ 116 116 init_rwsem(rwsem); \ ··· 168 168 * Shortcuts for lock/unlock API variants, to keep 169 169 * the testcases compact: 170 170 */ 171 - #define L(x) spin_lock(&lock_##x) 172 - #define U(x) spin_unlock(&lock_##x) 171 + #define L(x) raw_spin_lock(&lock_##x) 172 + #define U(x) raw_spin_unlock(&lock_##x) 173 173 #define LU(x) L(x); U(x) 174 - #define SI(x) spin_lock_init(&lock_##x) 174 + #define SI(x) raw_spin_lock_init(&lock_##x) 175 175 176 176 #define WL(x) write_lock(&rwlock_##x) 177 177 #define WU(x) write_unlock(&rwlock_##x) ··· 911 911 912 912 #define I2(x) \ 913 913 do { \ 914 - spin_lock_init(&lock_##x); \ 914 + raw_spin_lock_init(&lock_##x); \ 915 915 rwlock_init(&rwlock_##x); \ 916 916 mutex_init(&mutex_##x); \ 917 917 init_rwsem(&rwsem_##x); \
+24 -45
lib/rwsem-spinlock.c
··· 73 73 goto dont_wake_writers; 74 74 } 75 75 76 - /* if we are allowed to wake writers try to grant a single write lock 77 - * if there's a writer at the front of the queue 78 - * - we leave the 'waiting count' incremented to signify potential 79 - * contention 76 + /* 77 + * as we support write lock stealing, we can't set sem->activity 78 + * to -1 here to indicate we get the lock. Instead, we wake it up 79 + * to let it go get it again. 80 80 */ 81 81 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { 82 - sem->activity = -1; 83 - list_del(&waiter->list); 84 - tsk = waiter->task; 85 - /* Don't touch waiter after ->task has been NULLed */ 86 - smp_mb(); 87 - waiter->task = NULL; 88 - wake_up_process(tsk); 89 - put_task_struct(tsk); 82 + wake_up_process(waiter->task); 90 83 goto out; 91 84 } 92 85 ··· 114 121 __rwsem_wake_one_writer(struct rw_semaphore *sem) 115 122 { 116 123 struct rwsem_waiter *waiter; 117 - struct task_struct *tsk; 118 - 119 - sem->activity = -1; 120 124 121 125 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 122 - list_del(&waiter->list); 126 + wake_up_process(waiter->task); 123 127 124 - tsk = waiter->task; 125 - smp_mb(); 126 - waiter->task = NULL; 127 - wake_up_process(tsk); 128 - put_task_struct(tsk); 129 128 return sem; 130 129 } 131 130 ··· 189 204 190 205 /* 191 206 * get a write lock on the semaphore 192 - * - we increment the waiting count anyway to indicate an exclusive lock 193 207 */ 194 208 void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) 195 209 { ··· 198 214 199 215 raw_spin_lock_irqsave(&sem->wait_lock, flags); 200 216 201 - if (sem->activity == 0 && list_empty(&sem->wait_list)) { 202 - /* granted */ 203 - sem->activity = -1; 204 - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 205 - goto out; 206 - } 207 - 208 - tsk = current; 209 - set_task_state(tsk, TASK_UNINTERRUPTIBLE); 210 - 211 217 /* set up my own style of waitqueue */ 218 + tsk = current; 212 219 waiter.task = tsk; 213 220 waiter.flags = RWSEM_WAITING_FOR_WRITE; 214 - get_task_struct(tsk); 215 - 216 221 list_add_tail(&waiter.list, &sem->wait_list); 217 222 218 - /* we don't need to touch the semaphore struct anymore */ 219 - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 220 - 221 - /* wait to be given the lock */ 223 + /* wait for someone to release the lock */ 222 224 for (;;) { 223 - if (!waiter.task) 225 + /* 226 + * That is the key to support write lock stealing: allows the 227 + * task already on CPU to get the lock soon rather than put 228 + * itself into sleep and waiting for system woke it or someone 229 + * else in the head of the wait list up. 230 + */ 231 + if (sem->activity == 0) 224 232 break; 225 - schedule(); 226 233 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 234 + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 235 + schedule(); 236 + raw_spin_lock_irqsave(&sem->wait_lock, flags); 227 237 } 238 + /* got the lock */ 239 + sem->activity = -1; 240 + list_del(&waiter.list); 228 241 229 - tsk->state = TASK_RUNNING; 230 - out: 231 - ; 242 + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 232 243 } 233 244 234 245 void __sched __down_write(struct rw_semaphore *sem) ··· 241 262 242 263 raw_spin_lock_irqsave(&sem->wait_lock, flags); 243 264 244 - if (sem->activity == 0 && list_empty(&sem->wait_list)) { 245 - /* granted */ 265 + if (sem->activity == 0) { 266 + /* got the lock */ 246 267 sem->activity = -1; 247 268 ret = 1; 248 269 }
+46 -29
lib/rwsem.c
··· 2 2 * 3 3 * Written by David Howells (dhowells@redhat.com). 4 4 * Derived from arch/i386/kernel/semaphore.c 5 + * 6 + * Writer lock-stealing by Alex Shi <alex.shi@intel.com> 5 7 */ 6 8 #include <linux/rwsem.h> 7 9 #include <linux/sched.h> ··· 62 60 struct rwsem_waiter *waiter; 63 61 struct task_struct *tsk; 64 62 struct list_head *next; 65 - signed long oldcount, woken, loop, adjustment; 63 + signed long woken, loop, adjustment; 66 64 67 65 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 68 66 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) ··· 74 72 */ 75 73 goto out; 76 74 77 - /* There's a writer at the front of the queue - try to grant it the 78 - * write lock. However, we only wake this writer if we can transition 79 - * the active part of the count from 0 -> 1 80 - */ 81 - adjustment = RWSEM_ACTIVE_WRITE_BIAS; 82 - if (waiter->list.next == &sem->wait_list) 83 - adjustment -= RWSEM_WAITING_BIAS; 84 - 85 - try_again_write: 86 - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; 87 - if (oldcount & RWSEM_ACTIVE_MASK) 88 - /* Someone grabbed the sem already */ 89 - goto undo_write; 90 - 91 - /* We must be careful not to touch 'waiter' after we set ->task = NULL. 92 - * It is an allocated on the waiter's stack and may become invalid at 93 - * any time after that point (due to a wakeup from another source). 94 - */ 95 - list_del(&waiter->list); 96 - tsk = waiter->task; 97 - smp_mb(); 98 - waiter->task = NULL; 99 - wake_up_process(tsk); 100 - put_task_struct(tsk); 75 + /* Wake up the writing waiter and let the task grab the sem: */ 76 + wake_up_process(waiter->task); 101 77 goto out; 102 78 103 79 readers_only: ··· 137 157 138 158 out: 139 159 return sem; 160 + } 140 161 141 - /* undo the change to the active count, but check for a transition 142 - * 1->0 */ 143 - undo_write: 162 + /* Try to get write sem, caller holds sem->wait_lock: */ 163 + static int try_get_writer_sem(struct rw_semaphore *sem, 164 + struct rwsem_waiter *waiter) 165 + { 166 + struct rwsem_waiter *fwaiter; 167 + long oldcount, adjustment; 168 + 169 + /* only steal when first waiter is writing */ 170 + fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 171 + if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) 172 + return 0; 173 + 174 + adjustment = RWSEM_ACTIVE_WRITE_BIAS; 175 + /* Only one waiter in the queue: */ 176 + if (fwaiter == waiter && waiter->list.next == &sem->wait_list) 177 + adjustment -= RWSEM_WAITING_BIAS; 178 + 179 + try_again_write: 180 + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; 181 + if (!(oldcount & RWSEM_ACTIVE_MASK)) { 182 + /* No active lock: */ 183 + struct task_struct *tsk = waiter->task; 184 + 185 + list_del(&waiter->list); 186 + smp_mb(); 187 + put_task_struct(tsk); 188 + tsk->state = TASK_RUNNING; 189 + return 1; 190 + } 191 + /* some one grabbed the sem already */ 144 192 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) 145 - goto out; 193 + return 0; 146 194 goto try_again_write; 147 195 } 148 196 ··· 218 210 for (;;) { 219 211 if (!waiter.task) 220 212 break; 213 + 214 + raw_spin_lock_irq(&sem->wait_lock); 215 + /* Try to get the writer sem, may steal from the head writer: */ 216 + if (flags == RWSEM_WAITING_FOR_WRITE) 217 + if (try_get_writer_sem(sem, &waiter)) { 218 + raw_spin_unlock_irq(&sem->wait_lock); 219 + return sem; 220 + } 221 + raw_spin_unlock_irq(&sem->wait_lock); 221 222 schedule(); 222 223 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 223 224 }