Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking changes from Ingo Molnar:
"Various updates:

- Futex scalability improvements: remove page lock use for shared
futex get_futex_key(), which speeds up 'perf bench futex hash'
benchmarks by over 40% on a 60-core Westmere. This makes anon-mem
shared futexes perform close to private futexes. (Mel Gorman)

- lockdep hash collision detection and fix (Alfredo Alvarez
Fernandez)

- lockdep testing enhancements (Alfredo Alvarez Fernandez)

- robustify lockdep init by using hlists (Andrew Morton, Andrey
Ryabinin)

- mutex and csd_lock micro-optimizations (Davidlohr Bueso)

- small x86 barriers tweaks (Michael S Tsirkin)

- qspinlock updates (Waiman Long)"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (25 commits)
locking/csd_lock: Use smp_cond_acquire() in csd_lock_wait()
locking/csd_lock: Explicitly inline csd_lock*() helpers
futex: Replace barrier() in unqueue_me() with READ_ONCE()
locking/lockdep: Detect chain_key collisions
locking/lockdep: Prevent chain_key collisions
tools/lib/lockdep: Fix link creation warning
tools/lib/lockdep: Add tests for AA and ABBA locking
tools/lib/lockdep: Add userspace version of READ_ONCE()
tools/lib/lockdep: Fix the build on recent kernels
locking/qspinlock: Move __ARCH_SPIN_LOCK_UNLOCKED to qspinlock_types.h
locking/mutex: Allow next waiter lockless wakeup
locking/pvqspinlock: Enable slowpath locking count tracking
locking/qspinlock: Use smp_cond_acquire() in pending code
locking/pvqspinlock: Move lock stealing count tracking code into pv_queued_spin_steal_lock()
locking/mcs: Fix mcs_spin_lock() ordering
futex: Remove requirement for lock_page() in get_futex_key()
futex: Rename barrier references in ordering guarantees
locking/atomics: Update comment about READ_ONCE() and structures
locking/lockdep: Eliminate lockdep_init()
locking/lockdep: Convert hash tables to hlists
...

+334 -214
-2
arch/c6x/kernel/setup.c
··· 281 281 */ 282 282 set_ist(_vectors_start); 283 283 284 - lockdep_init(); 285 - 286 284 /* 287 285 * dtb is passed in from bootloader. 288 286 * fdt is linked in blob.
-2
arch/microblaze/kernel/setup.c
··· 130 130 memset(__bss_start, 0, __bss_stop-__bss_start); 131 131 memset(_ssbss, 0, _esbss-_ssbss); 132 132 133 - lockdep_init(); 134 - 135 133 /* initialize device tree for usage in early_printk */ 136 134 early_init_devtree(_fdt_start); 137 135
-2
arch/powerpc/kernel/setup_32.c
··· 114 114 115 115 notrace void __init machine_init(u64 dt_ptr) 116 116 { 117 - lockdep_init(); 118 - 119 117 /* Enable early debugging if any specified (see udbg.h) */ 120 118 udbg_early_init(); 121 119
-3
arch/powerpc/kernel/setup_64.c
··· 255 255 setup_paca(&boot_paca); 256 256 fixup_boot_paca(); 257 257 258 - /* Initialize lockdep early or else spinlocks will blow */ 259 - lockdep_init(); 260 - 261 258 /* -------- printk is now safe to use ------- */ 262 259 263 260 /* Enable early debugging if any specified (see udbg.h) */
-1
arch/s390/kernel/early.c
··· 448 448 rescue_initrd(); 449 449 clear_bss_section(); 450 450 init_kernel_storage_key(); 451 - lockdep_init(); 452 451 lockdep_off(); 453 452 setup_lowcore_early(); 454 453 setup_facility_list();
-8
arch/sparc/kernel/head_64.S
··· 696 696 call __bzero 697 697 sub %o1, %o0, %o1 698 698 699 - #ifdef CONFIG_LOCKDEP 700 - /* We have this call this super early, as even prom_init can grab 701 - * spinlocks and thus call into the lockdep code. 702 - */ 703 - call lockdep_init 704 - nop 705 - #endif 706 - 707 699 call prom_init 708 700 mov %l7, %o0 ! OpenPROM cif handler 709 701
+7 -8
arch/x86/include/asm/barrier.h
··· 6 6 7 7 /* 8 8 * Force strict CPU ordering. 9 - * And yes, this is required on UP too when we're talking 9 + * And yes, this might be required on UP too when we're talking 10 10 * to devices. 11 11 */ 12 12 13 13 #ifdef CONFIG_X86_32 14 - /* 15 - * Some non-Intel clones support out of order store. wmb() ceases to be a 16 - * nop for these. 17 - */ 18 - #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) 19 - #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) 20 - #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) 14 + #define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \ 15 + X86_FEATURE_XMM2) ::: "memory", "cc") 16 + #define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \ 17 + X86_FEATURE_XMM2) ::: "memory", "cc") 18 + #define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \ 19 + X86_FEATURE_XMM2) ::: "memory", "cc") 21 20 #else 22 21 #define mb() asm volatile("mfence":::"memory") 23 22 #define rmb() asm volatile("lfence":::"memory")
+2 -2
arch/x86/kernel/process.c
··· 418 418 if (!current_set_polling_and_test()) { 419 419 trace_cpu_idle_rcuidle(1, smp_processor_id()); 420 420 if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { 421 - smp_mb(); /* quirk */ 421 + mb(); /* quirk */ 422 422 clflush((void *)&current_thread_info()->flags); 423 - smp_mb(); /* quirk */ 423 + mb(); /* quirk */ 424 424 } 425 425 426 426 __monitor((void *)&current_thread_info()->flags, 0, 0);
-6
arch/x86/lguest/boot.c
··· 1520 1520 */ 1521 1521 reserve_top_address(lguest_data.reserve_mem); 1522 1522 1523 - /* 1524 - * If we don't initialize the lock dependency checker now, it crashes 1525 - * atomic_notifier_chain_register, then paravirt_disable_iospace. 1526 - */ 1527 - lockdep_init(); 1528 - 1529 1523 /* Hook in our special panic hypercall code. */ 1530 1524 atomic_notifier_chain_register(&panic_notifier_list, &paniced); 1531 1525
-5
include/asm-generic/qspinlock.h
··· 120 120 #endif 121 121 122 122 /* 123 - * Initializier 124 - */ 125 - #define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) } 126 - 127 - /* 128 123 * Remapping spinlock architecture specific functions to the corresponding 129 124 * queued spinlock functions. 130 125 */
+5
include/asm-generic/qspinlock_types.h
··· 33 33 } arch_spinlock_t; 34 34 35 35 /* 36 + * Initializier 37 + */ 38 + #define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) } 39 + 40 + /* 36 41 * Bitfields in the atomic value: 37 42 * 38 43 * When NR_CPUS < 16K
+3 -2
include/linux/compiler.h
··· 263 263 * In contrast to ACCESS_ONCE these two macros will also work on aggregate 264 264 * data types like structs or unions. If the size of the accessed data 265 265 * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) 266 - * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a 267 - * compile-time warning. 266 + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at 267 + * least two memcpy()s: one for the __builtin_memcpy() and then one for 268 + * the macro doing the copy of variable - '__u' allocated on the stack. 268 269 * 269 270 * Their two major use cases are: (1) Mediating communication between 270 271 * process-level code and irq/NMI handlers, all running on the same CPU,
-2
include/linux/lockdep.h
··· 261 261 /* 262 262 * Initialization, self-test and debugging-output methods: 263 263 */ 264 - extern void lockdep_init(void); 265 264 extern void lockdep_info(void); 266 265 extern void lockdep_reset(void); 267 266 extern void lockdep_reset_lock(struct lockdep_map *lock); ··· 391 392 # define lockdep_set_current_reclaim_state(g) do { } while (0) 392 393 # define lockdep_clear_current_reclaim_state() do { } while (0) 393 394 # define lockdep_trace_alloc(g) do { } while (0) 394 - # define lockdep_init() do { } while (0) 395 395 # define lockdep_info() do { } while (0) 396 396 # define lockdep_init_map(lock, name, key, sub) \ 397 397 do { (void)(name); (void)(key); } while (0)
-5
init/main.c
··· 499 499 char *command_line; 500 500 char *after_dashes; 501 501 502 - /* 503 - * Need to run as early as possible, to initialize the 504 - * lockdep hash: 505 - */ 506 - lockdep_init(); 507 502 set_task_stack_end_magic(&init_task); 508 503 smp_setup_processor_id(); 509 504 debug_objects_early_init();
+113 -26
kernel/futex.c
··· 124 124 * futex_wait(futex, val); 125 125 * 126 126 * waiters++; (a) 127 - * mb(); (A) <-- paired with -. 128 - * | 129 - * lock(hash_bucket(futex)); | 130 - * | 131 - * uval = *futex; | 132 - * | *futex = newval; 133 - * | sys_futex(WAKE, futex); 134 - * | futex_wake(futex); 135 - * | 136 - * `-------> mb(); (B) 127 + * smp_mb(); (A) <-- paired with -. 128 + * | 129 + * lock(hash_bucket(futex)); | 130 + * | 131 + * uval = *futex; | 132 + * | *futex = newval; 133 + * | sys_futex(WAKE, futex); 134 + * | futex_wake(futex); 135 + * | 136 + * `--------> smp_mb(); (B) 137 137 * if (uval == val) 138 138 * queue(); 139 139 * unlock(hash_bucket(futex)); ··· 334 334 /* 335 335 * Ensure futex_get_mm() implies a full barrier such that 336 336 * get_futex_key() implies a full barrier. This is relied upon 337 - * as full barrier (B), see the ordering comment above. 337 + * as smp_mb(); (B), see the ordering comment above. 338 338 */ 339 339 smp_mb__after_atomic(); 340 340 } ··· 407 407 408 408 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { 409 409 case FUT_OFF_INODE: 410 - ihold(key->shared.inode); /* implies MB (B) */ 410 + ihold(key->shared.inode); /* implies smp_mb(); (B) */ 411 411 break; 412 412 case FUT_OFF_MMSHARED: 413 - futex_get_mm(key); /* implies MB (B) */ 413 + futex_get_mm(key); /* implies smp_mb(); (B) */ 414 414 break; 415 415 default: 416 416 /* ··· 418 418 * mm, therefore the only purpose of calling get_futex_key_refs 419 419 * is because we need the barrier for the lockless waiter check. 420 420 */ 421 - smp_mb(); /* explicit MB (B) */ 421 + smp_mb(); /* explicit smp_mb(); (B) */ 422 422 } 423 423 } 424 424 ··· 497 497 if (!fshared) { 498 498 key->private.mm = mm; 499 499 key->private.address = address; 500 - get_futex_key_refs(key); /* implies MB (B) */ 500 + get_futex_key_refs(key); /* implies smp_mb(); (B) */ 501 501 return 0; 502 502 } 503 503 ··· 520 520 else 521 521 err = 0; 522 522 523 - lock_page(page); 523 + /* 524 + * The treatment of mapping from this point on is critical. The page 525 + * lock protects many things but in this context the page lock 526 + * stabilizes mapping, prevents inode freeing in the shared 527 + * file-backed region case and guards against movement to swap cache. 528 + * 529 + * Strictly speaking the page lock is not needed in all cases being 530 + * considered here and page lock forces unnecessarily serialization 531 + * From this point on, mapping will be re-verified if necessary and 532 + * page lock will be acquired only if it is unavoidable 533 + */ 534 + page = compound_head(page); 535 + mapping = READ_ONCE(page->mapping); 536 + 524 537 /* 525 538 * If page->mapping is NULL, then it cannot be a PageAnon 526 539 * page; but it might be the ZERO_PAGE or in the gate area or ··· 549 536 * shmem_writepage move it from filecache to swapcache beneath us: 550 537 * an unlikely race, but we do need to retry for page->mapping. 551 538 */ 552 - mapping = compound_head(page)->mapping; 553 - if (!mapping) { 554 - int shmem_swizzled = PageSwapCache(page); 539 + if (unlikely(!mapping)) { 540 + int shmem_swizzled; 541 + 542 + /* 543 + * Page lock is required to identify which special case above 544 + * applies. If this is really a shmem page then the page lock 545 + * will prevent unexpected transitions. 546 + */ 547 + lock_page(page); 548 + shmem_swizzled = PageSwapCache(page) || page->mapping; 555 549 unlock_page(page); 556 550 put_page(page); 551 + 557 552 if (shmem_swizzled) 558 553 goto again; 554 + 559 555 return -EFAULT; 560 556 } 561 557 562 558 /* 563 559 * Private mappings are handled in a simple way. 560 + * 561 + * If the futex key is stored on an anonymous page, then the associated 562 + * object is the mm which is implicitly pinned by the calling process. 564 563 * 565 564 * NOTE: When userspace waits on a MAP_SHARED mapping, even if 566 565 * it's a read-only handle, it's expected that futexes attach to ··· 591 566 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ 592 567 key->private.mm = mm; 593 568 key->private.address = address; 569 + 570 + get_futex_key_refs(key); /* implies smp_mb(); (B) */ 571 + 594 572 } else { 573 + struct inode *inode; 574 + 575 + /* 576 + * The associated futex object in this case is the inode and 577 + * the page->mapping must be traversed. Ordinarily this should 578 + * be stabilised under page lock but it's not strictly 579 + * necessary in this case as we just want to pin the inode, not 580 + * update the radix tree or anything like that. 581 + * 582 + * The RCU read lock is taken as the inode is finally freed 583 + * under RCU. If the mapping still matches expectations then the 584 + * mapping->host can be safely accessed as being a valid inode. 585 + */ 586 + rcu_read_lock(); 587 + 588 + if (READ_ONCE(page->mapping) != mapping) { 589 + rcu_read_unlock(); 590 + put_page(page); 591 + 592 + goto again; 593 + } 594 + 595 + inode = READ_ONCE(mapping->host); 596 + if (!inode) { 597 + rcu_read_unlock(); 598 + put_page(page); 599 + 600 + goto again; 601 + } 602 + 603 + /* 604 + * Take a reference unless it is about to be freed. Previously 605 + * this reference was taken by ihold under the page lock 606 + * pinning the inode in place so i_lock was unnecessary. The 607 + * only way for this check to fail is if the inode was 608 + * truncated in parallel so warn for now if this happens. 609 + * 610 + * We are not calling into get_futex_key_refs() in file-backed 611 + * cases, therefore a successful atomic_inc return below will 612 + * guarantee that get_futex_key() will still imply smp_mb(); (B). 613 + */ 614 + if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) { 615 + rcu_read_unlock(); 616 + put_page(page); 617 + 618 + goto again; 619 + } 620 + 621 + /* Should be impossible but lets be paranoid for now */ 622 + if (WARN_ON_ONCE(inode->i_mapping != mapping)) { 623 + err = -EFAULT; 624 + rcu_read_unlock(); 625 + iput(inode); 626 + 627 + goto out; 628 + } 629 + 595 630 key->both.offset |= FUT_OFF_INODE; /* inode-based key */ 596 - key->shared.inode = mapping->host; 631 + key->shared.inode = inode; 597 632 key->shared.pgoff = basepage_index(page); 633 + rcu_read_unlock(); 598 634 } 599 635 600 - get_futex_key_refs(key); /* implies MB (B) */ 601 - 602 636 out: 603 - unlock_page(page); 604 637 put_page(page); 605 638 return err; 606 639 } ··· 1947 1864 1948 1865 q->lock_ptr = &hb->lock; 1949 1866 1950 - spin_lock(&hb->lock); /* implies MB (A) */ 1867 + spin_lock(&hb->lock); /* implies smp_mb(); (A) */ 1951 1868 return hb; 1952 1869 } 1953 1870 ··· 2010 1927 2011 1928 /* In the common case we don't take the spinlock, which is nice. */ 2012 1929 retry: 2013 - lock_ptr = q->lock_ptr; 2014 - barrier(); 1930 + /* 1931 + * q->lock_ptr can change between this read and the following spin_lock. 1932 + * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and 1933 + * optimizing lock_ptr out of the logic below. 1934 + */ 1935 + lock_ptr = READ_ONCE(q->lock_ptr); 2015 1936 if (lock_ptr != NULL) { 2016 1937 spin_lock(lock_ptr); 2017 1938 /*
+57 -75
kernel/locking/lockdep.c
··· 123 123 return ret; 124 124 } 125 125 126 - static int lockdep_initialized; 127 - 128 126 unsigned long nr_list_entries; 129 127 static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; 130 128 ··· 432 434 433 435 #ifdef CONFIG_DEBUG_LOCKDEP 434 436 /* 435 - * We cannot printk in early bootup code. Not even early_printk() 436 - * might work. So we mark any initialization errors and printk 437 - * about it later on, in lockdep_info(). 438 - */ 439 - static int lockdep_init_error; 440 - static const char *lock_init_error; 441 - static unsigned long lockdep_init_trace_data[20]; 442 - static struct stack_trace lockdep_init_trace = { 443 - .max_entries = ARRAY_SIZE(lockdep_init_trace_data), 444 - .entries = lockdep_init_trace_data, 445 - }; 446 - 447 - /* 448 437 * Various lockdep statistics: 449 438 */ 450 439 DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); ··· 653 668 struct lockdep_subclass_key *key; 654 669 struct hlist_head *hash_head; 655 670 struct lock_class *class; 656 - 657 - #ifdef CONFIG_DEBUG_LOCKDEP 658 - /* 659 - * If the architecture calls into lockdep before initializing 660 - * the hashes then we'll warn about it later. (we cannot printk 661 - * right now) 662 - */ 663 - if (unlikely(!lockdep_initialized)) { 664 - lockdep_init(); 665 - lockdep_init_error = 1; 666 - lock_init_error = lock->name; 667 - save_stack_trace(&lockdep_init_trace); 668 - } 669 - #endif 670 671 671 672 if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { 672 673 debug_locks_off(); ··· 1982 2011 } 1983 2012 1984 2013 /* 2014 + * Returns the index of the first held_lock of the current chain 2015 + */ 2016 + static inline int get_first_held_lock(struct task_struct *curr, 2017 + struct held_lock *hlock) 2018 + { 2019 + int i; 2020 + struct held_lock *hlock_curr; 2021 + 2022 + for (i = curr->lockdep_depth - 1; i >= 0; i--) { 2023 + hlock_curr = curr->held_locks + i; 2024 + if (hlock_curr->irq_context != hlock->irq_context) 2025 + break; 2026 + 2027 + } 2028 + 2029 + return ++i; 2030 + } 2031 + 2032 + /* 2033 + * Checks whether the chain and the current held locks are consistent 2034 + * in depth and also in content. If they are not it most likely means 2035 + * that there was a collision during the calculation of the chain_key. 2036 + * Returns: 0 not passed, 1 passed 2037 + */ 2038 + static int check_no_collision(struct task_struct *curr, 2039 + struct held_lock *hlock, 2040 + struct lock_chain *chain) 2041 + { 2042 + #ifdef CONFIG_DEBUG_LOCKDEP 2043 + int i, j, id; 2044 + 2045 + i = get_first_held_lock(curr, hlock); 2046 + 2047 + if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) 2048 + return 0; 2049 + 2050 + for (j = 0; j < chain->depth - 1; j++, i++) { 2051 + id = curr->held_locks[i].class_idx - 1; 2052 + 2053 + if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) 2054 + return 0; 2055 + } 2056 + #endif 2057 + return 1; 2058 + } 2059 + 2060 + /* 1985 2061 * Look up a dependency chain. If the key is not present yet then 1986 2062 * add it and return 1 - in this case the new dependency chain is 1987 2063 * validated. If the key is already hashed, return 0. ··· 2041 2023 struct lock_class *class = hlock_class(hlock); 2042 2024 struct hlist_head *hash_head = chainhashentry(chain_key); 2043 2025 struct lock_chain *chain; 2044 - struct held_lock *hlock_curr; 2045 2026 int i, j; 2046 2027 2047 2028 /* ··· 2058 2041 if (chain->chain_key == chain_key) { 2059 2042 cache_hit: 2060 2043 debug_atomic_inc(chain_lookup_hits); 2044 + if (!check_no_collision(curr, hlock, chain)) 2045 + return 0; 2046 + 2061 2047 if (very_verbose(class)) 2062 2048 printk("\nhash chain already cached, key: " 2063 2049 "%016Lx tail class: [%p] %s\n", ··· 2098 2078 chain = lock_chains + nr_lock_chains++; 2099 2079 chain->chain_key = chain_key; 2100 2080 chain->irq_context = hlock->irq_context; 2101 - /* Find the first held_lock of current chain */ 2102 - for (i = curr->lockdep_depth - 1; i >= 0; i--) { 2103 - hlock_curr = curr->held_locks + i; 2104 - if (hlock_curr->irq_context != hlock->irq_context) 2105 - break; 2106 - } 2107 - i++; 2081 + i = get_first_held_lock(curr, hlock); 2108 2082 chain->depth = curr->lockdep_depth + 1 - i; 2109 2083 if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { 2110 2084 chain->base = nr_chain_hlocks; ··· 2186 2172 { 2187 2173 #ifdef CONFIG_DEBUG_LOCKDEP 2188 2174 struct held_lock *hlock, *prev_hlock = NULL; 2189 - unsigned int i, id; 2175 + unsigned int i; 2190 2176 u64 chain_key = 0; 2191 2177 2192 2178 for (i = 0; i < curr->lockdep_depth; i++) { ··· 2203 2189 (unsigned long long)hlock->prev_chain_key); 2204 2190 return; 2205 2191 } 2206 - id = hlock->class_idx - 1; 2207 2192 /* 2208 2193 * Whoops ran out of static storage again? 2209 2194 */ 2210 - if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) 2195 + if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS)) 2211 2196 return; 2212 2197 2213 2198 if (prev_hlock && (prev_hlock->irq_context != 2214 2199 hlock->irq_context)) 2215 2200 chain_key = 0; 2216 - chain_key = iterate_chain_key(chain_key, id); 2201 + chain_key = iterate_chain_key(chain_key, hlock->class_idx); 2217 2202 prev_hlock = hlock; 2218 2203 } 2219 2204 if (chain_key != curr->curr_chain_key) { ··· 3090 3077 struct task_struct *curr = current; 3091 3078 struct lock_class *class = NULL; 3092 3079 struct held_lock *hlock; 3093 - unsigned int depth, id; 3080 + unsigned int depth; 3094 3081 int chain_head = 0; 3095 3082 int class_idx; 3096 3083 u64 chain_key; ··· 3193 3180 * The 'key ID' is what is the most compact key value to drive 3194 3181 * the hash, not class->key. 3195 3182 */ 3196 - id = class - lock_classes; 3197 3183 /* 3198 3184 * Whoops, we did it again.. ran straight out of our static allocation. 3199 3185 */ 3200 - if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) 3186 + if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS)) 3201 3187 return 0; 3202 3188 3203 3189 chain_key = curr->curr_chain_key; ··· 3214 3202 chain_key = 0; 3215 3203 chain_head = 1; 3216 3204 } 3217 - chain_key = iterate_chain_key(chain_key, id); 3205 + chain_key = iterate_chain_key(chain_key, class_idx); 3218 3206 3219 3207 if (nest_lock && !__lock_is_held(nest_lock)) 3220 3208 return print_lock_nested_lock_not_held(curr, hlock, ip); ··· 4025 4013 raw_local_irq_restore(flags); 4026 4014 } 4027 4015 4028 - void lockdep_init(void) 4029 - { 4030 - int i; 4031 - 4032 - /* 4033 - * Some architectures have their own start_kernel() 4034 - * code which calls lockdep_init(), while we also 4035 - * call lockdep_init() from the start_kernel() itself, 4036 - * and we want to initialize the hashes only once: 4037 - */ 4038 - if (lockdep_initialized) 4039 - return; 4040 - 4041 - for (i = 0; i < CLASSHASH_SIZE; i++) 4042 - INIT_HLIST_HEAD(classhash_table + i); 4043 - 4044 - for (i = 0; i < CHAINHASH_SIZE; i++) 4045 - INIT_HLIST_HEAD(chainhash_table + i); 4046 - 4047 - lockdep_initialized = 1; 4048 - } 4049 - 4050 4016 void __init lockdep_info(void) 4051 4017 { 4052 4018 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); ··· 4051 4061 4052 4062 printk(" per task-struct memory footprint: %lu bytes\n", 4053 4063 sizeof(struct held_lock) * MAX_LOCK_DEPTH); 4054 - 4055 - #ifdef CONFIG_DEBUG_LOCKDEP 4056 - if (lockdep_init_error) { 4057 - printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error); 4058 - printk("Call stack leading to lockdep invocation was:\n"); 4059 - print_stack_trace(&lockdep_init_trace, 0); 4060 - } 4061 - #endif 4062 4064 } 4063 4065 4064 4066 static void
+7 -1
kernel/locking/mcs_spinlock.h
··· 67 67 node->locked = 0; 68 68 node->next = NULL; 69 69 70 - prev = xchg_acquire(lock, node); 70 + /* 71 + * We rely on the full barrier with global transitivity implied by the 72 + * below xchg() to order the initialization stores above against any 73 + * observation of @node. And to provide the ACQUIRE ordering associated 74 + * with a LOCK primitive. 75 + */ 76 + prev = xchg(lock, node); 71 77 if (likely(prev == NULL)) { 72 78 /* 73 79 * Lock acquired, don't need to set node->locked to 1. Threads
+3 -2
kernel/locking/mutex.c
··· 716 716 __mutex_unlock_common_slowpath(struct mutex *lock, int nested) 717 717 { 718 718 unsigned long flags; 719 + WAKE_Q(wake_q); 719 720 720 721 /* 721 722 * As a performance measurement, release the lock before doing other ··· 744 743 struct mutex_waiter, list); 745 744 746 745 debug_mutex_wake_waiter(lock, waiter); 747 - 748 - wake_up_process(waiter->task); 746 + wake_q_add(&wake_q, waiter->task); 749 747 } 750 748 751 749 spin_unlock_mutex(&lock->wait_lock, flags); 750 + wake_up_q(&wake_q); 752 751 } 753 752 754 753 /*
+3 -4
kernel/locking/qspinlock.c
··· 358 358 * sequentiality; this is because not all clear_pending_set_locked() 359 359 * implementations imply full barriers. 360 360 */ 361 - while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK) 362 - cpu_relax(); 361 + smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK)); 363 362 364 363 /* 365 364 * take ownership and clear the pending bit. ··· 434 435 * 435 436 * The PV pv_wait_head_or_lock function, if active, will acquire 436 437 * the lock and return a non-zero value. So we have to skip the 437 - * smp_load_acquire() call. As the next PV queue head hasn't been 438 + * smp_cond_acquire() call. As the next PV queue head hasn't been 438 439 * designated yet, there is no way for the locked value to become 439 440 * _Q_SLOW_VAL. So both the set_locked() and the 440 441 * atomic_cmpxchg_relaxed() calls will be safe. ··· 465 466 break; 466 467 } 467 468 /* 468 - * The smp_load_acquire() call above has provided the necessary 469 + * The smp_cond_acquire() call above has provided the necessary 469 470 * acquire semantics required for locking. At most two 470 471 * iterations of this loop may be ran. 471 472 */
+14 -7
kernel/locking/qspinlock_paravirt.h
··· 55 55 }; 56 56 57 57 /* 58 + * Include queued spinlock statistics code 59 + */ 60 + #include "qspinlock_stat.h" 61 + 62 + /* 58 63 * By replacing the regular queued_spin_trylock() with the function below, 59 64 * it will be called once when a lock waiter enter the PV slowpath before 60 65 * being queued. By allowing one lock stealing attempt here when the pending ··· 70 65 static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock) 71 66 { 72 67 struct __qspinlock *l = (void *)lock; 68 + int ret = !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && 69 + (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0); 73 70 74 - return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && 75 - (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0); 71 + qstat_inc(qstat_pv_lock_stealing, ret); 72 + return ret; 76 73 } 77 74 78 75 /* ··· 143 136 return 0; 144 137 } 145 138 #endif /* _Q_PENDING_BITS == 8 */ 146 - 147 - /* 148 - * Include queued spinlock statistics code 149 - */ 150 - #include "qspinlock_stat.h" 151 139 152 140 /* 153 141 * Lock and MCS node addresses hash table for fast lookup ··· 399 397 */ 400 398 if (READ_ONCE(pn->state) == vcpu_hashed) 401 399 lp = (struct qspinlock **)1; 400 + 401 + /* 402 + * Tracking # of slowpath locking operations 403 + */ 404 + qstat_inc(qstat_pv_lock_slowpath, true); 402 405 403 406 for (;; waitcnt++) { 404 407 /*
+3 -13
kernel/locking/qspinlock_stat.h
··· 22 22 * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake 23 23 * pv_latency_kick - average latency (ns) of vCPU kick operation 24 24 * pv_latency_wake - average latency (ns) from vCPU kick to wakeup 25 + * pv_lock_slowpath - # of locking operations via the slowpath 25 26 * pv_lock_stealing - # of lock stealing operations 26 27 * pv_spurious_wakeup - # of spurious wakeups 27 28 * pv_wait_again - # of vCPU wait's that happened after a vCPU kick ··· 46 45 qstat_pv_kick_wake, 47 46 qstat_pv_latency_kick, 48 47 qstat_pv_latency_wake, 48 + qstat_pv_lock_slowpath, 49 49 qstat_pv_lock_stealing, 50 50 qstat_pv_spurious_wakeup, 51 51 qstat_pv_wait_again, ··· 72 70 [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup", 73 71 [qstat_pv_latency_kick] = "pv_latency_kick", 74 72 [qstat_pv_latency_wake] = "pv_latency_wake", 73 + [qstat_pv_lock_slowpath] = "pv_lock_slowpath", 75 74 [qstat_pv_lock_stealing] = "pv_lock_stealing", 76 75 [qstat_pv_wait_again] = "pv_wait_again", 77 76 [qstat_pv_wait_early] = "pv_wait_early", ··· 281 278 282 279 #define pv_kick(c) __pv_kick(c) 283 280 #define pv_wait(p, v) __pv_wait(p, v) 284 - 285 - /* 286 - * PV unfair trylock count tracking function 287 - */ 288 - static inline int qstat_spin_steal_lock(struct qspinlock *lock) 289 - { 290 - int ret = pv_queued_spin_steal_lock(lock); 291 - 292 - qstat_inc(qstat_pv_lock_stealing, ret); 293 - return ret; 294 - } 295 - #undef queued_spin_trylock 296 - #define queued_spin_trylock(l) qstat_spin_steal_lock(l) 297 281 298 282 #else /* CONFIG_QUEUED_LOCK_STAT */ 299 283
+4 -5
kernel/smp.c
··· 105 105 * previous function call. For multi-cpu calls its even more interesting 106 106 * as we'll have to ensure no other cpu is observing our csd. 107 107 */ 108 - static void csd_lock_wait(struct call_single_data *csd) 108 + static __always_inline void csd_lock_wait(struct call_single_data *csd) 109 109 { 110 - while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK) 111 - cpu_relax(); 110 + smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK)); 112 111 } 113 112 114 - static void csd_lock(struct call_single_data *csd) 113 + static __always_inline void csd_lock(struct call_single_data *csd) 115 114 { 116 115 csd_lock_wait(csd); 117 116 csd->flags |= CSD_FLAG_LOCK; ··· 123 124 smp_wmb(); 124 125 } 125 126 126 - static void csd_unlock(struct call_single_data *csd) 127 + static __always_inline void csd_unlock(struct call_single_data *csd) 127 128 { 128 129 WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); 129 130
+42 -20
lib/test_static_keys.c
··· 46 46 bool (*test_key)(void); 47 47 }; 48 48 49 - #define test_key_func(key, branch) \ 50 - ({bool func(void) { return branch(key); } func; }) 49 + #define test_key_func(key, branch) \ 50 + static bool key ## _ ## branch(void) \ 51 + { \ 52 + return branch(&key); \ 53 + } 51 54 52 55 static void invert_key(struct static_key *key) 53 56 { ··· 95 92 return 0; 96 93 } 97 94 95 + test_key_func(old_true_key, static_key_true) 96 + test_key_func(old_false_key, static_key_false) 97 + test_key_func(true_key, static_branch_likely) 98 + test_key_func(true_key, static_branch_unlikely) 99 + test_key_func(false_key, static_branch_likely) 100 + test_key_func(false_key, static_branch_unlikely) 101 + test_key_func(base_old_true_key, static_key_true) 102 + test_key_func(base_inv_old_true_key, static_key_true) 103 + test_key_func(base_old_false_key, static_key_false) 104 + test_key_func(base_inv_old_false_key, static_key_false) 105 + test_key_func(base_true_key, static_branch_likely) 106 + test_key_func(base_true_key, static_branch_unlikely) 107 + test_key_func(base_inv_true_key, static_branch_likely) 108 + test_key_func(base_inv_true_key, static_branch_unlikely) 109 + test_key_func(base_false_key, static_branch_likely) 110 + test_key_func(base_false_key, static_branch_unlikely) 111 + test_key_func(base_inv_false_key, static_branch_likely) 112 + test_key_func(base_inv_false_key, static_branch_unlikely) 113 + 98 114 static int __init test_static_key_init(void) 99 115 { 100 116 int ret; ··· 124 102 { 125 103 .init_state = true, 126 104 .key = &old_true_key, 127 - .test_key = test_key_func(&old_true_key, static_key_true), 105 + .test_key = &old_true_key_static_key_true, 128 106 }, 129 107 { 130 108 .init_state = false, 131 109 .key = &old_false_key, 132 - .test_key = test_key_func(&old_false_key, static_key_false), 110 + .test_key = &old_false_key_static_key_false, 133 111 }, 134 112 /* internal keys - new keys */ 135 113 { 136 114 .init_state = true, 137 115 .key = &true_key.key, 138 - .test_key = test_key_func(&true_key, static_branch_likely), 116 + .test_key = &true_key_static_branch_likely, 139 117 }, 140 118 { 141 119 .init_state = true, 142 120 .key = &true_key.key, 143 - .test_key = test_key_func(&true_key, static_branch_unlikely), 121 + .test_key = &true_key_static_branch_unlikely, 144 122 }, 145 123 { 146 124 .init_state = false, 147 125 .key = &false_key.key, 148 - .test_key = test_key_func(&false_key, static_branch_likely), 126 + .test_key = &false_key_static_branch_likely, 149 127 }, 150 128 { 151 129 .init_state = false, 152 130 .key = &false_key.key, 153 - .test_key = test_key_func(&false_key, static_branch_unlikely), 131 + .test_key = &false_key_static_branch_unlikely, 154 132 }, 155 133 /* external keys - old keys */ 156 134 { 157 135 .init_state = true, 158 136 .key = &base_old_true_key, 159 - .test_key = test_key_func(&base_old_true_key, static_key_true), 137 + .test_key = &base_old_true_key_static_key_true, 160 138 }, 161 139 { 162 140 .init_state = false, 163 141 .key = &base_inv_old_true_key, 164 - .test_key = test_key_func(&base_inv_old_true_key, static_key_true), 142 + .test_key = &base_inv_old_true_key_static_key_true, 165 143 }, 166 144 { 167 145 .init_state = false, 168 146 .key = &base_old_false_key, 169 - .test_key = test_key_func(&base_old_false_key, static_key_false), 147 + .test_key = &base_old_false_key_static_key_false, 170 148 }, 171 149 { 172 150 .init_state = true, 173 151 .key = &base_inv_old_false_key, 174 - .test_key = test_key_func(&base_inv_old_false_key, static_key_false), 152 + .test_key = &base_inv_old_false_key_static_key_false, 175 153 }, 176 154 /* external keys - new keys */ 177 155 { 178 156 .init_state = true, 179 157 .key = &base_true_key.key, 180 - .test_key = test_key_func(&base_true_key, static_branch_likely), 158 + .test_key = &base_true_key_static_branch_likely, 181 159 }, 182 160 { 183 161 .init_state = true, 184 162 .key = &base_true_key.key, 185 - .test_key = test_key_func(&base_true_key, static_branch_unlikely), 163 + .test_key = &base_true_key_static_branch_unlikely, 186 164 }, 187 165 { 188 166 .init_state = false, 189 167 .key = &base_inv_true_key.key, 190 - .test_key = test_key_func(&base_inv_true_key, static_branch_likely), 168 + .test_key = &base_inv_true_key_static_branch_likely, 191 169 }, 192 170 { 193 171 .init_state = false, 194 172 .key = &base_inv_true_key.key, 195 - .test_key = test_key_func(&base_inv_true_key, static_branch_unlikely), 173 + .test_key = &base_inv_true_key_static_branch_unlikely, 196 174 }, 197 175 { 198 176 .init_state = false, 199 177 .key = &base_false_key.key, 200 - .test_key = test_key_func(&base_false_key, static_branch_likely), 178 + .test_key = &base_false_key_static_branch_likely, 201 179 }, 202 180 { 203 181 .init_state = false, 204 182 .key = &base_false_key.key, 205 - .test_key = test_key_func(&base_false_key, static_branch_unlikely), 183 + .test_key = &base_false_key_static_branch_unlikely, 206 184 }, 207 185 { 208 186 .init_state = true, 209 187 .key = &base_inv_false_key.key, 210 - .test_key = test_key_func(&base_inv_false_key, static_branch_likely), 188 + .test_key = &base_inv_false_key_static_branch_likely, 211 189 }, 212 190 { 213 191 .init_state = true, 214 192 .key = &base_inv_false_key.key, 215 - .test_key = test_key_func(&base_inv_false_key, static_branch_unlikely), 193 + .test_key = &base_inv_false_key_static_branch_unlikely, 216 194 }, 217 195 }; 218 196
+1 -1
tools/lib/lockdep/Makefile
··· 100 100 101 101 do_compile_shared_library = \ 102 102 ($(print_shared_lib_compile) \ 103 - $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -s $@ liblockdep.so)) 103 + $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -sf $@ liblockdep.so)) 104 104 105 105 do_build_static_lib = \ 106 106 ($(print_static_lib_build) \
-5
tools/lib/lockdep/common.c
··· 11 11 bool debug_locks = true; 12 12 bool debug_locks_silent; 13 13 14 - __attribute__((constructor)) static void liblockdep_init(void) 15 - { 16 - lockdep_init(); 17 - } 18 - 19 14 __attribute__((destructor)) static void liblockdep_exit(void) 20 15 { 21 16 debug_check_no_locks_held();
-1
tools/lib/lockdep/include/liblockdep/common.h
··· 44 44 void lock_release(struct lockdep_map *lock, int nested, 45 45 unsigned long ip); 46 46 extern void debug_check_no_locks_freed(const void *from, unsigned long len); 47 - extern void lockdep_init(void); 48 47 49 48 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ 50 49 { .name = (_name), .key = (void *)(_key), }
+6
tools/lib/lockdep/lockdep.c
··· 1 1 #include <linux/lockdep.h> 2 + 3 + /* Trivial API wrappers, we don't (yet) have RCU in user-space: */ 4 + #define hlist_for_each_entry_rcu hlist_for_each_entry 5 + #define hlist_add_head_rcu hlist_add_head 6 + #define hlist_del_rcu hlist_del 7 + 2 8 #include "../../../kernel/locking/lockdep.c"
-2
tools/lib/lockdep/preload.c
··· 439 439 ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock"); 440 440 #endif 441 441 442 - lockdep_init(); 443 - 444 442 __init_state = done; 445 443 }
+4 -4
tools/lib/lockdep/tests/AA.c
··· 1 1 #include <liblockdep/mutex.h> 2 2 3 - void main(void) 3 + int main(void) 4 4 { 5 - pthread_mutex_t a, b; 5 + pthread_mutex_t a; 6 6 7 7 pthread_mutex_init(&a, NULL); 8 - pthread_mutex_init(&b, NULL); 9 8 10 9 pthread_mutex_lock(&a); 11 - pthread_mutex_lock(&b); 12 10 pthread_mutex_lock(&a); 11 + 12 + return 0; 13 13 }
+13
tools/lib/lockdep/tests/ABA.c
··· 1 + #include <liblockdep/mutex.h> 2 + 3 + void main(void) 4 + { 5 + pthread_mutex_t a, b; 6 + 7 + pthread_mutex_init(&a, NULL); 8 + pthread_mutex_init(&b, NULL); 9 + 10 + pthread_mutex_lock(&a); 11 + pthread_mutex_lock(&b); 12 + pthread_mutex_lock(&a); 13 + }
+46
tools/lib/lockdep/tests/ABBA_2threads.c
··· 1 + #include <stdio.h> 2 + #include <pthread.h> 3 + 4 + pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER; 5 + pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER; 6 + pthread_barrier_t bar; 7 + 8 + void *ba_lock(void *arg) 9 + { 10 + int ret, i; 11 + 12 + pthread_mutex_lock(&b); 13 + 14 + if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD) 15 + pthread_barrier_destroy(&bar); 16 + 17 + pthread_mutex_lock(&a); 18 + 19 + pthread_mutex_unlock(&a); 20 + pthread_mutex_unlock(&b); 21 + } 22 + 23 + int main(void) 24 + { 25 + pthread_t t; 26 + 27 + pthread_barrier_init(&bar, NULL, 2); 28 + 29 + if (pthread_create(&t, NULL, ba_lock, NULL)) { 30 + fprintf(stderr, "pthread_create() failed\n"); 31 + return 1; 32 + } 33 + pthread_mutex_lock(&a); 34 + 35 + if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD) 36 + pthread_barrier_destroy(&bar); 37 + 38 + pthread_mutex_lock(&b); 39 + 40 + pthread_mutex_unlock(&b); 41 + pthread_mutex_unlock(&a); 42 + 43 + pthread_join(t, NULL); 44 + 45 + return 0; 46 + }
+1
tools/lib/lockdep/uinclude/linux/compiler.h
··· 3 3 4 4 #define __used __attribute__((__unused__)) 5 5 #define unlikely 6 + #define READ_ONCE(x) (x) 6 7 #define WRITE_ONCE(x, val) x=(val) 7 8 #define RCU_INIT_POINTER(p, v) p=(v) 8 9