Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull core locking changes from Ingo Molnar:
"The biggest change is the rwsem lock-steal improvements, both to the
assembly optimized and the spinlock based variants.

The other notable change is the clean up of the seqlock implementation
to be based on the seqcount infrastructure.

The rest is assorted smaller debuggability, cleanup and continued -rt
locking changes."

* 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
rwsem-spinlock: Implement writer lock-stealing for better scalability
futex: Revert "futex: Mark get_robust_list as deprecated"
generic: Use raw local irq variant for generic cmpxchg
lockdep: Selftest: convert spinlock to raw spinlock
seqlock: Use seqcount infrastructure
seqlock: Remove unused functions
ntp: Make ntp_lock raw
intel_idle: Convert i7300_idle_lock to raw_spinlock
locking: Various static lock initializer fixes
lockdep: Print more info when MAX_LOCK_DEPTH is exceeded
rwsem: Implement writer lock-stealing for better scalability
lockdep: Silence warning if CONFIG_LOCKDEP isn't set
watchdog: Use local_clock for get_timestamp()
lockdep: Rename print_unlock_inbalance_bug() to print_unlock_imbalance_bug()
locking/stat: Fix a typo

Linus Torvalds 13 years ago 3b5d8510 c47f39e3

+222 -238

17 changed files

expand all collapse all

Documentation

lockstat.txt

drivers

char

random.c

idle

i7300_idle.c

usb

chipidea

debug.c

file.c

include

asm-generic

cmpxchg-local.h

linux

idr.h

lockdep.h

seqlock.h

kernel

futex.c

futex_compat.c

lockdep.c

time

ntp.c

watchdog.c

lib

locking-selftest.c

rwsem-spinlock.c

rwsem.c

+1 -1

Documentation/lockstat.txt

reviewed

··· 65 65 66 66 - CONFIGURATION 67 67 68 68 - Lock statistics are enabled via CONFIG_LOCK_STATS. 68 68 + Lock statistics are enabled via CONFIG_LOCK_STAT. 69 69 70 70 - USAGE 71 71

+3 -3

drivers/char/random.c

reviewed

··· 445 445 .poolinfo = &poolinfo_table[0], 446 446 .name = "input", 447 447 .limit = 1, 448 448 - .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock), 448 448 + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), 449 449 .pool = input_pool_data 450 450 }; 451 451 ··· 454 454 .name = "blocking", 455 455 .limit = 1, 456 456 .pull = &input_pool, 457 457 - .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock), 457 457 + .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock), 458 458 .pool = blocking_pool_data 459 459 }; 460 460 ··· 462 462 .poolinfo = &poolinfo_table[1], 463 463 .name = "nonblocking", 464 464 .pull = &input_pool, 465 465 - .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock), 465 465 + .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), 466 466 .pool = nonblocking_pool_data 467 467 }; 468 468

+4 -4

drivers/idle/i7300_idle.c

reviewed

··· 75 75 76 76 static struct pci_dev *fbd_dev; 77 77 78 78 - static spinlock_t i7300_idle_lock; 78 78 + static raw_spinlock_t i7300_idle_lock; 79 79 static int i7300_idle_active; 80 80 81 81 static u8 i7300_idle_thrtctl_saved; ··· 457 457 idle_begin_time = ktime_get(); 458 458 } 459 459 460 460 - spin_lock_irqsave(&i7300_idle_lock, flags); 460 460 + raw_spin_lock_irqsave(&i7300_idle_lock, flags); 461 461 if (val == IDLE_START) { 462 462 463 463 cpumask_set_cpu(smp_processor_id(), idle_cpumask); ··· 506 506 } 507 507 } 508 508 end: 509 509 - spin_unlock_irqrestore(&i7300_idle_lock, flags); 509 509 + raw_spin_unlock_irqrestore(&i7300_idle_lock, flags); 510 510 return 0; 511 511 } 512 512 ··· 548 548 549 549 static int __init i7300_idle_init(void) 550 550 { 551 551 - spin_lock_init(&i7300_idle_lock); 551 551 + raw_spin_lock_init(&i7300_idle_lock); 552 552 total_us = 0; 553 553 554 554 if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))

+1 -1

drivers/usb/chipidea/debug.c

reviewed

··· 222 222 } dbg_data = { 223 223 .idx = 0, 224 224 .tty = 0, 225 225 - .lck = __RW_LOCK_UNLOCKED(lck) 225 225 + .lck = __RW_LOCK_UNLOCKED(dbg_data.lck) 226 226 }; 227 227 228 228 /**

+1 -1

fs/file.c

reviewed

··· 516 516 .close_on_exec = init_files.close_on_exec_init, 517 517 .open_fds = init_files.open_fds_init, 518 518 }, 519 519 - .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 519 + .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), 520 520 }; 521 521 522 522 /*

+4 -4

include/asm-generic/cmpxchg-local.h

reviewed

··· 21 21 if (size == 8 && sizeof(unsigned long) != 8) 22 22 wrong_size_cmpxchg(ptr); 23 23 24 24 - local_irq_save(flags); 24 24 + raw_local_irq_save(flags); 25 25 switch (size) { 26 26 case 1: prev = *(u8 *)ptr; 27 27 if (prev == old) ··· 42 42 default: 43 43 wrong_size_cmpxchg(ptr); 44 44 } 45 45 - local_irq_restore(flags); 45 45 + raw_local_irq_restore(flags); 46 46 return prev; 47 47 } 48 48 ··· 55 55 u64 prev; 56 56 unsigned long flags; 57 57 58 58 - local_irq_save(flags); 58 58 + raw_local_irq_save(flags); 59 59 prev = *(u64 *)ptr; 60 60 if (prev == old) 61 61 *(u64 *)ptr = new; 62 62 - local_irq_restore(flags); 62 62 + raw_local_irq_restore(flags); 63 63 return prev; 64 64 } 65 65

+1 -1

include/linux/idr.h

reviewed

··· 136 136 struct ida_bitmap *free_bitmap; 137 137 }; 138 138 139 139 - #define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } 139 139 + #define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } 140 140 #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) 141 141 142 142 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);

+1 -1

include/linux/lockdep.h

reviewed

··· 412 412 413 413 #define lockdep_depth(tsk) (0) 414 414 415 415 - #define lockdep_assert_held(l) do { } while (0) 415 415 + #define lockdep_assert_held(l) do { (void)(l); } while (0) 416 416 417 417 #define lockdep_recursing(tsk) (0) 418 418

+92 -103

include/linux/seqlock.h

reviewed

··· 30 30 #include <linux/preempt.h> 31 31 #include <asm/processor.h> 32 32 33 33 - typedef struct { 34 34 - unsigned sequence; 35 35 - spinlock_t lock; 36 36 - } seqlock_t; 37 37 - 38 38 - /* 39 39 - * These macros triggered gcc-3.x compile-time problems. We think these are 40 40 - * OK now. Be cautious. 41 41 - */ 42 42 - #define __SEQLOCK_UNLOCKED(lockname) \ 43 43 - { 0, __SPIN_LOCK_UNLOCKED(lockname) } 44 44 - 45 45 - #define seqlock_init(x) \ 46 46 - do { \ 47 47 - (x)->sequence = 0; \ 48 48 - spin_lock_init(&(x)->lock); \ 49 49 - } while (0) 50 50 - 51 51 - #define DEFINE_SEQLOCK(x) \ 52 52 - seqlock_t x = __SEQLOCK_UNLOCKED(x) 53 53 - 54 54 - /* Lock out other writers and update the count. 55 55 - * Acts like a normal spin_lock/unlock. 56 56 - * Don't need preempt_disable() because that is in the spin_lock already. 57 57 - */ 58 58 - static inline void write_seqlock(seqlock_t *sl) 59 59 - { 60 60 - spin_lock(&sl->lock); 61 61 - ++sl->sequence; 62 62 - smp_wmb(); 63 63 - } 64 64 - 65 65 - static inline void write_sequnlock(seqlock_t *sl) 66 66 - { 67 67 - smp_wmb(); 68 68 - sl->sequence++; 69 69 - spin_unlock(&sl->lock); 70 70 - } 71 71 - 72 72 - static inline int write_tryseqlock(seqlock_t *sl) 73 73 - { 74 74 - int ret = spin_trylock(&sl->lock); 75 75 - 76 76 - if (ret) { 77 77 - ++sl->sequence; 78 78 - smp_wmb(); 79 79 - } 80 80 - return ret; 81 81 - } 82 82 - 83 83 - /* Start of read calculation -- fetch last complete writer token */ 84 84 - static __always_inline unsigned read_seqbegin(const seqlock_t *sl) 85 85 - { 86 86 - unsigned ret; 87 87 - 88 88 - repeat: 89 89 - ret = ACCESS_ONCE(sl->sequence); 90 90 - if (unlikely(ret & 1)) { 91 91 - cpu_relax(); 92 92 - goto repeat; 93 93 - } 94 94 - smp_rmb(); 95 95 - 96 96 - return ret; 97 97 - } 98 98 - 99 99 - /* 100 100 - * Test if reader processed invalid data. 101 101 - * 102 102 - * If sequence value changed then writer changed data while in section. 103 103 - */ 104 104 - static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start) 105 105 - { 106 106 - smp_rmb(); 107 107 - 108 108 - return unlikely(sl->sequence != start); 109 109 - } 110 110 - 111 111 - 112 33 /* 113 34 * Version using sequence counter only. 114 35 * This can be used when code has its own mutex protecting the 115 36 * updating starting before the write_seqcountbeqin() and ending 116 37 * after the write_seqcount_end(). 117 38 */ 118 118 - 119 39 typedef struct seqcount { 120 40 unsigned sequence; 121 41 } seqcount_t; ··· 138 218 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 139 219 { 140 220 smp_rmb(); 141 141 - 142 221 return __read_seqcount_retry(s, start); 143 222 } 144 223 ··· 171 252 s->sequence+=2; 172 253 } 173 254 255 255 + typedef struct { 256 256 + struct seqcount seqcount; 257 257 + spinlock_t lock; 258 258 + } seqlock_t; 259 259 + 174 260 /* 175 175 - * Possible sw/hw IRQ protected versions of the interfaces. 261 261 + * These macros triggered gcc-3.x compile-time problems. We think these are 262 262 + * OK now. Be cautious. 176 263 */ 264 264 + #define __SEQLOCK_UNLOCKED(lockname) \ 265 265 + { \ 266 266 + .seqcount = SEQCNT_ZERO, \ 267 267 + .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 268 268 + } 269 269 + 270 270 + #define seqlock_init(x) \ 271 271 + do { \ 272 272 + seqcount_init(&(x)->seqcount); \ 273 273 + spin_lock_init(&(x)->lock); \ 274 274 + } while (0) 275 275 + 276 276 + #define DEFINE_SEQLOCK(x) \ 277 277 + seqlock_t x = __SEQLOCK_UNLOCKED(x) 278 278 + 279 279 + /* 280 280 + * Read side functions for starting and finalizing a read side section. 281 281 + */ 282 282 + static inline unsigned read_seqbegin(const seqlock_t *sl) 283 283 + { 284 284 + return read_seqcount_begin(&sl->seqcount); 285 285 + } 286 286 + 287 287 + static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 288 288 + { 289 289 + return read_seqcount_retry(&sl->seqcount, start); 290 290 + } 291 291 + 292 292 + /* 293 293 + * Lock out other writers and update the count. 294 294 + * Acts like a normal spin_lock/unlock. 295 295 + * Don't need preempt_disable() because that is in the spin_lock already. 296 296 + */ 297 297 + static inline void write_seqlock(seqlock_t *sl) 298 298 + { 299 299 + spin_lock(&sl->lock); 300 300 + write_seqcount_begin(&sl->seqcount); 301 301 + } 302 302 + 303 303 + static inline void write_sequnlock(seqlock_t *sl) 304 304 + { 305 305 + write_seqcount_end(&sl->seqcount); 306 306 + spin_unlock(&sl->lock); 307 307 + } 308 308 + 309 309 + static inline void write_seqlock_bh(seqlock_t *sl) 310 310 + { 311 311 + spin_lock_bh(&sl->lock); 312 312 + write_seqcount_begin(&sl->seqcount); 313 313 + } 314 314 + 315 315 + static inline void write_sequnlock_bh(seqlock_t *sl) 316 316 + { 317 317 + write_seqcount_end(&sl->seqcount); 318 318 + spin_unlock_bh(&sl->lock); 319 319 + } 320 320 + 321 321 + static inline void write_seqlock_irq(seqlock_t *sl) 322 322 + { 323 323 + spin_lock_irq(&sl->lock); 324 324 + write_seqcount_begin(&sl->seqcount); 325 325 + } 326 326 + 327 327 + static inline void write_sequnlock_irq(seqlock_t *sl) 328 328 + { 329 329 + write_seqcount_end(&sl->seqcount); 330 330 + spin_unlock_irq(&sl->lock); 331 331 + } 332 332 + 333 333 + static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 334 334 + { 335 335 + unsigned long flags; 336 336 + 337 337 + spin_lock_irqsave(&sl->lock, flags); 338 338 + write_seqcount_begin(&sl->seqcount); 339 339 + return flags; 340 340 + } 341 341 + 177 342 #define write_seqlock_irqsave(lock, flags) \ 178 178 - do { local_irq_save(flags); write_seqlock(lock); } while (0) 179 179 - #define write_seqlock_irq(lock) \ 180 180 - do { local_irq_disable(); write_seqlock(lock); } while (0) 181 181 - #define write_seqlock_bh(lock) \ 182 182 - do { local_bh_disable(); write_seqlock(lock); } while (0) 343 343 + do { flags = __write_seqlock_irqsave(lock); } while (0) 183 344 184 184 - #define write_sequnlock_irqrestore(lock, flags) \ 185 185 - do { write_sequnlock(lock); local_irq_restore(flags); } while(0) 186 186 - #define write_sequnlock_irq(lock) \ 187 187 - do { write_sequnlock(lock); local_irq_enable(); } while(0) 188 188 - #define write_sequnlock_bh(lock) \ 189 189 - do { write_sequnlock(lock); local_bh_enable(); } while(0) 190 190 - 191 191 - #define read_seqbegin_irqsave(lock, flags) \ 192 192 - ({ local_irq_save(flags); read_seqbegin(lock); }) 193 193 - 194 194 - #define read_seqretry_irqrestore(lock, iv, flags) \ 195 195 - ({ \ 196 196 - int ret = read_seqretry(lock, iv); \ 197 197 - local_irq_restore(flags); \ 198 198 - ret; \ 199 199 - }) 345 345 + static inline void 346 346 + write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 347 347 + { 348 348 + write_seqcount_end(&sl->seqcount); 349 349 + spin_unlock_irqrestore(&sl->lock, flags); 350 350 + } 200 351 201 352 #endif /* __LINUX_SEQLOCK_H */

-2

kernel/futex.c

reviewed

··· 2472 2472 if (!futex_cmpxchg_enabled) 2473 2473 return -ENOSYS; 2474 2474 2475 2475 - WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); 2476 2476 - 2477 2475 rcu_read_lock(); 2478 2476 2479 2477 ret = -ESRCH;

-2

kernel/futex_compat.c

reviewed

··· 142 142 if (!futex_cmpxchg_enabled) 143 143 return -ENOSYS; 144 144 145 145 - WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); 146 146 - 147 145 rcu_read_lock(); 148 146 149 147 ret = -ESRCH;

+10 -5

kernel/lockdep.c

reviewed

··· 3190 3190 #endif 3191 3191 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { 3192 3192 debug_locks_off(); 3193 3193 - printk("BUG: MAX_LOCK_DEPTH too low!\n"); 3193 3193 + printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n", 3194 3194 + curr->lockdep_depth, MAX_LOCK_DEPTH); 3194 3195 printk("turning off the locking correctness validator.\n"); 3196 3196 + 3197 3197 + lockdep_print_held_locks(current); 3198 3198 + debug_show_all_locks(); 3195 3199 dump_stack(); 3200 3200 + 3196 3201 return 0; 3197 3202 } 3198 3203 ··· 3208 3203 } 3209 3204 3210 3205 static int 3211 3211 - print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, 3206 3206 + print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, 3212 3207 unsigned long ip) 3213 3208 { 3214 3209 if (!debug_locks_off()) ··· 3251 3246 return 0; 3252 3247 3253 3248 if (curr->lockdep_depth <= 0) 3254 3254 - return print_unlock_inbalance_bug(curr, lock, ip); 3249 3249 + return print_unlock_imbalance_bug(curr, lock, ip); 3255 3250 3256 3251 return 1; 3257 3252 } ··· 3322 3317 goto found_it; 3323 3318 prev_hlock = hlock; 3324 3319 } 3325 3325 - return print_unlock_inbalance_bug(curr, lock, ip); 3320 3320 + return print_unlock_imbalance_bug(curr, lock, ip); 3326 3321 3327 3322 found_it: 3328 3323 lockdep_init_map(lock, name, key, 0); ··· 3389 3384 goto found_it; 3390 3385 prev_hlock = hlock; 3391 3386 } 3392 3392 - return print_unlock_inbalance_bug(curr, lock, ip); 3387 3387 + return print_unlock_imbalance_bug(curr, lock, ip); 3393 3388 3394 3389 found_it: 3395 3390 if (hlock->instance == lock)

+13 -13

kernel/time/ntp.c

reviewed

··· 23 23 * NTP timekeeping variables: 24 24 */ 25 25 26 26 - DEFINE_SPINLOCK(ntp_lock); 26 26 + DEFINE_RAW_SPINLOCK(ntp_lock); 27 27 28 28 29 29 /* USER_HZ period (usecs): */ ··· 348 348 { 349 349 unsigned long flags; 350 350 351 351 - spin_lock_irqsave(&ntp_lock, flags); 351 351 + raw_spin_lock_irqsave(&ntp_lock, flags); 352 352 353 353 time_adjust = 0; /* stop active adjtime() */ 354 354 time_status |= STA_UNSYNC; ··· 362 362 363 363 /* Clear PPS state variables */ 364 364 pps_clear(); 365 365 - spin_unlock_irqrestore(&ntp_lock, flags); 365 365 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 366 366 367 367 } 368 368 ··· 372 372 unsigned long flags; 373 373 s64 ret; 374 374 375 375 - spin_lock_irqsave(&ntp_lock, flags); 375 375 + raw_spin_lock_irqsave(&ntp_lock, flags); 376 376 ret = tick_length; 377 377 - spin_unlock_irqrestore(&ntp_lock, flags); 377 377 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 378 378 return ret; 379 379 } 380 380 ··· 395 395 int leap = 0; 396 396 unsigned long flags; 397 397 398 398 - spin_lock_irqsave(&ntp_lock, flags); 398 398 + raw_spin_lock_irqsave(&ntp_lock, flags); 399 399 400 400 /* 401 401 * Leap second processing. If in leap-insert state at the end of the ··· 479 479 time_adjust = 0; 480 480 481 481 out: 482 482 - spin_unlock_irqrestore(&ntp_lock, flags); 482 482 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 483 483 484 484 return leap; 485 485 } ··· 672 672 673 673 getnstimeofday(&ts); 674 674 675 675 - spin_lock_irq(&ntp_lock); 675 675 + raw_spin_lock_irq(&ntp_lock); 676 676 677 677 if (txc->modes & ADJ_ADJTIME) { 678 678 long save_adjust = time_adjust; ··· 714 714 /* fill PPS status fields */ 715 715 pps_fill_timex(txc); 716 716 717 717 - spin_unlock_irq(&ntp_lock); 717 717 + raw_spin_unlock_irq(&ntp_lock); 718 718 719 719 txc->time.tv_sec = ts.tv_sec; 720 720 txc->time.tv_usec = ts.tv_nsec; ··· 912 912 913 913 pts_norm = pps_normalize_ts(*phase_ts); 914 914 915 915 - spin_lock_irqsave(&ntp_lock, flags); 915 915 + raw_spin_lock_irqsave(&ntp_lock, flags); 916 916 917 917 /* clear the error bits, they will be set again if needed */ 918 918 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); ··· 925 925 * just start the frequency interval */ 926 926 if (unlikely(pps_fbase.tv_sec == 0)) { 927 927 pps_fbase = *raw_ts; 928 928 - spin_unlock_irqrestore(&ntp_lock, flags); 928 928 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 929 929 return; 930 930 } 931 931 ··· 940 940 time_status |= STA_PPSJITTER; 941 941 /* restart the frequency calibration interval */ 942 942 pps_fbase = *raw_ts; 943 943 - spin_unlock_irqrestore(&ntp_lock, flags); 943 943 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 944 944 pr_err("hardpps: PPSJITTER: bad pulse\n"); 945 945 return; 946 946 } ··· 957 957 958 958 hardpps_update_phase(pts_norm.nsec); 959 959 960 960 - spin_unlock_irqrestore(&ntp_lock, flags); 960 960 + raw_spin_unlock_irqrestore(&ntp_lock, flags); 961 961 } 962 962 EXPORT_SYMBOL(hardpps); 963 963

+4 -6

kernel/watchdog.c

reviewed

··· 113 113 * resolution, and we don't need to waste time with a big divide when 114 114 * 2^30ns == 1.074s. 115 115 */ 116 116 - static unsigned long get_timestamp(int this_cpu) 116 116 + static unsigned long get_timestamp(void) 117 117 { 118 118 - return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ 118 118 + return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ 119 119 } 120 120 121 121 static void set_sample_period(void) ··· 133 133 /* Commands for resetting the watchdog */ 134 134 static void __touch_watchdog(void) 135 135 { 136 136 - int this_cpu = smp_processor_id(); 137 137 - 138 138 - __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu)); 136 136 + __this_cpu_write(watchdog_touch_ts, get_timestamp()); 139 137 } 140 138 141 139 void touch_softlockup_watchdog(void) ··· 194 196 195 197 static int is_softlockup(unsigned long touch_ts) 196 198 { 197 197 - unsigned long now = get_timestamp(smp_processor_id()); 199 199 + unsigned long now = get_timestamp(); 198 200 199 201 /* Warn about unreasonable delays: */ 200 202 if (time_after(now, touch_ts + get_softlockup_thresh()))

+17 -17

lib/locking-selftest.c

reviewed

··· 47 47 * Normal standalone locks, for the circular and irq-context 48 48 * dependency tests: 49 49 */ 50 50 - static DEFINE_SPINLOCK(lock_A); 51 51 - static DEFINE_SPINLOCK(lock_B); 52 52 - static DEFINE_SPINLOCK(lock_C); 53 53 - static DEFINE_SPINLOCK(lock_D); 50 50 + static DEFINE_RAW_SPINLOCK(lock_A); 51 51 + static DEFINE_RAW_SPINLOCK(lock_B); 52 52 + static DEFINE_RAW_SPINLOCK(lock_C); 53 53 + static DEFINE_RAW_SPINLOCK(lock_D); 54 54 55 55 static DEFINE_RWLOCK(rwlock_A); 56 56 static DEFINE_RWLOCK(rwlock_B); ··· 73 73 * but X* and Y* are different classes. We do this so that 74 74 * we do not trigger a real lockup: 75 75 */ 76 76 - static DEFINE_SPINLOCK(lock_X1); 77 77 - static DEFINE_SPINLOCK(lock_X2); 78 78 - static DEFINE_SPINLOCK(lock_Y1); 79 79 - static DEFINE_SPINLOCK(lock_Y2); 80 80 - static DEFINE_SPINLOCK(lock_Z1); 81 81 - static DEFINE_SPINLOCK(lock_Z2); 76 76 + static DEFINE_RAW_SPINLOCK(lock_X1); 77 77 + static DEFINE_RAW_SPINLOCK(lock_X2); 78 78 + static DEFINE_RAW_SPINLOCK(lock_Y1); 79 79 + static DEFINE_RAW_SPINLOCK(lock_Y2); 80 80 + static DEFINE_RAW_SPINLOCK(lock_Z1); 81 81 + static DEFINE_RAW_SPINLOCK(lock_Z2); 82 82 83 83 static DEFINE_RWLOCK(rwlock_X1); 84 84 static DEFINE_RWLOCK(rwlock_X2); ··· 107 107 */ 108 108 #define INIT_CLASS_FUNC(class) \ 109 109 static noinline void \ 110 110 - init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ 111 111 - struct rw_semaphore *rwsem) \ 110 110 + init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \ 111 111 + struct mutex *mutex, struct rw_semaphore *rwsem)\ 112 112 { \ 113 113 - spin_lock_init(lock); \ 113 113 + raw_spin_lock_init(lock); \ 114 114 rwlock_init(rwlock); \ 115 115 mutex_init(mutex); \ 116 116 init_rwsem(rwsem); \ ··· 168 168 * Shortcuts for lock/unlock API variants, to keep 169 169 * the testcases compact: 170 170 */ 171 171 - #define L(x) spin_lock(&lock_##x) 172 172 - #define U(x) spin_unlock(&lock_##x) 171 171 + #define L(x) raw_spin_lock(&lock_##x) 172 172 + #define U(x) raw_spin_unlock(&lock_##x) 173 173 #define LU(x) L(x); U(x) 174 174 - #define SI(x) spin_lock_init(&lock_##x) 174 174 + #define SI(x) raw_spin_lock_init(&lock_##x) 175 175 176 176 #define WL(x) write_lock(&rwlock_##x) 177 177 #define WU(x) write_unlock(&rwlock_##x) ··· 911 911 912 912 #define I2(x) \ 913 913 do { \ 914 914 - spin_lock_init(&lock_##x); \ 914 914 + raw_spin_lock_init(&lock_##x); \ 915 915 rwlock_init(&rwlock_##x); \ 916 916 mutex_init(&mutex_##x); \ 917 917 init_rwsem(&rwsem_##x); \

+24 -45

lib/rwsem-spinlock.c

reviewed

··· 73 73 goto dont_wake_writers; 74 74 } 75 75 76 76 - /* if we are allowed to wake writers try to grant a single write lock 77 77 - * if there's a writer at the front of the queue 78 78 - * - we leave the 'waiting count' incremented to signify potential 79 79 - * contention 76 76 + /* 77 77 + * as we support write lock stealing, we can't set sem->activity 78 78 + * to -1 here to indicate we get the lock. Instead, we wake it up 79 79 + * to let it go get it again. 80 80 */ 81 81 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { 82 82 - sem->activity = -1; 83 83 - list_del(&waiter->list); 84 84 - tsk = waiter->task; 85 85 - /* Don't touch waiter after ->task has been NULLed */ 86 86 - smp_mb(); 87 87 - waiter->task = NULL; 88 88 - wake_up_process(tsk); 89 89 - put_task_struct(tsk); 82 82 + wake_up_process(waiter->task); 90 83 goto out; 91 84 } 92 85 ··· 114 121 __rwsem_wake_one_writer(struct rw_semaphore *sem) 115 122 { 116 123 struct rwsem_waiter *waiter; 117 117 - struct task_struct *tsk; 118 118 - 119 119 - sem->activity = -1; 120 124 121 125 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 122 122 - list_del(&waiter->list); 126 126 + wake_up_process(waiter->task); 123 127 124 124 - tsk = waiter->task; 125 125 - smp_mb(); 126 126 - waiter->task = NULL; 127 127 - wake_up_process(tsk); 128 128 - put_task_struct(tsk); 129 128 return sem; 130 129 } 131 130 ··· 189 204 190 205 /* 191 206 * get a write lock on the semaphore 192 192 - * - we increment the waiting count anyway to indicate an exclusive lock 193 207 */ 194 208 void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) 195 209 { ··· 198 214 199 215 raw_spin_lock_irqsave(&sem->wait_lock, flags); 200 216 201 201 - if (sem->activity == 0 && list_empty(&sem->wait_list)) { 202 202 - /* granted */ 203 203 - sem->activity = -1; 204 204 - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 205 205 - goto out; 206 206 - } 207 207 - 208 208 - tsk = current; 209 209 - set_task_state(tsk, TASK_UNINTERRUPTIBLE); 210 210 - 211 217 /* set up my own style of waitqueue */ 218 218 + tsk = current; 212 219 waiter.task = tsk; 213 220 waiter.flags = RWSEM_WAITING_FOR_WRITE; 214 214 - get_task_struct(tsk); 215 215 - 216 221 list_add_tail(&waiter.list, &sem->wait_list); 217 222 218 218 - /* we don't need to touch the semaphore struct anymore */ 219 219 - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 220 220 - 221 221 - /* wait to be given the lock */ 223 223 + /* wait for someone to release the lock */ 222 224 for (;;) { 223 223 - if (!waiter.task) 225 225 + /* 226 226 + * That is the key to support write lock stealing: allows the 227 227 + * task already on CPU to get the lock soon rather than put 228 228 + * itself into sleep and waiting for system woke it or someone 229 229 + * else in the head of the wait list up. 230 230 + */ 231 231 + if (sem->activity == 0) 224 232 break; 225 225 - schedule(); 226 233 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 234 234 + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 235 235 + schedule(); 236 236 + raw_spin_lock_irqsave(&sem->wait_lock, flags); 227 237 } 238 238 + /* got the lock */ 239 239 + sem->activity = -1; 240 240 + list_del(&waiter.list); 228 241 229 229 - tsk->state = TASK_RUNNING; 230 230 - out: 231 231 - ; 242 242 + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 232 243 } 233 244 234 245 void __sched __down_write(struct rw_semaphore *sem) ··· 241 262 242 263 raw_spin_lock_irqsave(&sem->wait_lock, flags); 243 264 244 244 - if (sem->activity == 0 && list_empty(&sem->wait_list)) { 245 245 - /* granted */ 265 265 + if (sem->activity == 0) { 266 266 + /* got the lock */ 246 267 sem->activity = -1; 247 268 ret = 1; 248 269 }

+46 -29

lib/rwsem.c

reviewed

··· 2 2 * 3 3 * Written by David Howells (dhowells@redhat.com). 4 4 * Derived from arch/i386/kernel/semaphore.c 5 5 + * 6 6 + * Writer lock-stealing by Alex Shi <alex.shi@intel.com> 5 7 */ 6 8 #include <linux/rwsem.h> 7 9 #include <linux/sched.h> ··· 62 60 struct rwsem_waiter *waiter; 63 61 struct task_struct *tsk; 64 62 struct list_head *next; 65 65 - signed long oldcount, woken, loop, adjustment; 63 63 + signed long woken, loop, adjustment; 66 64 67 65 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 68 66 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) ··· 74 72 */ 75 73 goto out; 76 74 77 77 - /* There's a writer at the front of the queue - try to grant it the 78 78 - * write lock. However, we only wake this writer if we can transition 79 79 - * the active part of the count from 0 -> 1 80 80 - */ 81 81 - adjustment = RWSEM_ACTIVE_WRITE_BIAS; 82 82 - if (waiter->list.next == &sem->wait_list) 83 83 - adjustment -= RWSEM_WAITING_BIAS; 84 84 - 85 85 - try_again_write: 86 86 - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; 87 87 - if (oldcount & RWSEM_ACTIVE_MASK) 88 88 - /* Someone grabbed the sem already */ 89 89 - goto undo_write; 90 90 - 91 91 - /* We must be careful not to touch 'waiter' after we set ->task = NULL. 92 92 - * It is an allocated on the waiter's stack and may become invalid at 93 93 - * any time after that point (due to a wakeup from another source). 94 94 - */ 95 95 - list_del(&waiter->list); 96 96 - tsk = waiter->task; 97 97 - smp_mb(); 98 98 - waiter->task = NULL; 99 99 - wake_up_process(tsk); 100 100 - put_task_struct(tsk); 75 75 + /* Wake up the writing waiter and let the task grab the sem: */ 76 76 + wake_up_process(waiter->task); 101 77 goto out; 102 78 103 79 readers_only: ··· 137 157 138 158 out: 139 159 return sem; 160 160 + } 140 161 141 141 - /* undo the change to the active count, but check for a transition 142 142 - * 1->0 */ 143 143 - undo_write: 162 162 + /* Try to get write sem, caller holds sem->wait_lock: */ 163 163 + static int try_get_writer_sem(struct rw_semaphore *sem, 164 164 + struct rwsem_waiter *waiter) 165 165 + { 166 166 + struct rwsem_waiter *fwaiter; 167 167 + long oldcount, adjustment; 168 168 + 169 169 + /* only steal when first waiter is writing */ 170 170 + fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 171 171 + if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) 172 172 + return 0; 173 173 + 174 174 + adjustment = RWSEM_ACTIVE_WRITE_BIAS; 175 175 + /* Only one waiter in the queue: */ 176 176 + if (fwaiter == waiter && waiter->list.next == &sem->wait_list) 177 177 + adjustment -= RWSEM_WAITING_BIAS; 178 178 + 179 179 + try_again_write: 180 180 + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; 181 181 + if (!(oldcount & RWSEM_ACTIVE_MASK)) { 182 182 + /* No active lock: */ 183 183 + struct task_struct *tsk = waiter->task; 184 184 + 185 185 + list_del(&waiter->list); 186 186 + smp_mb(); 187 187 + put_task_struct(tsk); 188 188 + tsk->state = TASK_RUNNING; 189 189 + return 1; 190 190 + } 191 191 + /* some one grabbed the sem already */ 144 192 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) 145 145 - goto out; 193 193 + return 0; 146 194 goto try_again_write; 147 195 } 148 196 ··· 218 210 for (;;) { 219 211 if (!waiter.task) 220 212 break; 213 213 + 214 214 + raw_spin_lock_irq(&sem->wait_lock); 215 215 + /* Try to get the writer sem, may steal from the head writer: */ 216 216 + if (flags == RWSEM_WAITING_FOR_WRITE) 217 217 + if (try_get_writer_sem(sem, &waiter)) { 218 218 + raw_spin_unlock_irq(&sem->wait_lock); 219 219 + return sem; 220 220 + } 221 221 + raw_spin_unlock_irq(&sem->wait_lock); 221 222 schedule(); 222 223 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 223 224 }