Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

locking/lockdep: Make class->ops a percpu counter and move it under CONFIG_DEBUG_LOCKDEP=y

A sizable portion of the CPU cycles spent on the __lock_acquire() is used
up by the atomic increment of the class->ops stat counter. By taking it out
from the lock_class structure and changing it to a per-cpu per-lock-class
counter, we can reduce the amount of cacheline contention on the class
structure when multiple CPUs are trying to acquire locks of the same
class simultaneously.

To limit the increase in memory consumption because of the percpu nature
of that counter, it is now put back under the CONFIG_DEBUG_LOCKDEP
config option. So the memory consumption increase will only occur if
CONFIG_DEBUG_LOCKDEP is defined. The lock_class structure, however,
is reduced in size by 16 bytes on 64-bit archs after ops removal and
a minor restructuring of the fields.

This patch also fixes a bug in the increment code as the counter is of
the 'unsigned long' type, but atomic_inc() was used to increment it.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/d66681f3-8781-9793-1dcf-2436a284550b@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Waiman Long and committed by
Ingo Molnar
8ca2b56c ce52a18d

+37 -10
+1 -6
include/linux/lockdep.h
··· 99 99 */ 100 100 unsigned int version; 101 101 102 - /* 103 - * Statistics counter: 104 - */ 105 - unsigned long ops; 106 - 107 - const char *name; 108 102 int name_version; 103 + const char *name; 109 104 110 105 #ifdef CONFIG_LOCK_STAT 111 106 unsigned long contention_point[LOCKSTAT_POINTS];
+8 -3
kernel/locking/lockdep.c
··· 139 139 * get freed - this significantly simplifies the debugging code. 140 140 */ 141 141 unsigned long nr_lock_classes; 142 - static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; 142 + struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; 143 143 144 144 static inline struct lock_class *hlock_class(struct held_lock *hlock) 145 145 { ··· 436 436 * Various lockdep statistics: 437 437 */ 438 438 DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); 439 + DEFINE_PER_CPU(unsigned long [MAX_LOCKDEP_KEYS], lock_class_ops); 439 440 #endif 440 441 441 442 /* ··· 1393 1392 1394 1393 printk("%*s->", depth, ""); 1395 1394 print_lock_name(class); 1396 - printk(KERN_CONT " ops: %lu", class->ops); 1395 + #ifdef CONFIG_DEBUG_LOCKDEP 1396 + printk(KERN_CONT " ops: %lu", debug_class_ops_read(class)); 1397 + #endif 1397 1398 printk(KERN_CONT " {\n"); 1398 1399 1399 1400 for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { ··· 3230 3227 if (!class) 3231 3228 return 0; 3232 3229 } 3233 - atomic_inc((atomic_t *)&class->ops); 3230 + 3231 + debug_class_ops_inc(class); 3232 + 3234 3233 if (very_verbose(class)) { 3235 3234 printk("\nacquire class [%px] %s", class->key, class->name); 3236 3235 if (class->name_version > 1)
+27
kernel/locking/lockdep_internals.h
··· 152 152 int nr_find_usage_forwards_recursions; 153 153 int nr_find_usage_backwards_checks; 154 154 int nr_find_usage_backwards_recursions; 155 + 156 + /* 157 + * Per lock class locking operation stat counts 158 + */ 159 + unsigned long lock_class_ops[MAX_LOCKDEP_KEYS]; 155 160 }; 156 161 157 162 DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); 163 + extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; 158 164 159 165 #define __debug_atomic_inc(ptr) \ 160 166 this_cpu_inc(lockdep_stats.ptr); ··· 185 179 } \ 186 180 __total; \ 187 181 }) 182 + 183 + static inline void debug_class_ops_inc(struct lock_class *class) 184 + { 185 + int idx; 186 + 187 + idx = class - lock_classes; 188 + __debug_atomic_inc(lock_class_ops[idx]); 189 + } 190 + 191 + static inline unsigned long debug_class_ops_read(struct lock_class *class) 192 + { 193 + int idx, cpu; 194 + unsigned long ops = 0; 195 + 196 + idx = class - lock_classes; 197 + for_each_possible_cpu(cpu) 198 + ops += per_cpu(lockdep_stats.lock_class_ops[idx], cpu); 199 + return ops; 200 + } 201 + 188 202 #else 189 203 # define __debug_atomic_inc(ptr) do { } while (0) 190 204 # define debug_atomic_inc(ptr) do { } while (0) 191 205 # define debug_atomic_dec(ptr) do { } while (0) 192 206 # define debug_atomic_read(ptr) 0 207 + # define debug_class_ops_inc(ptr) do { } while (0) 193 208 #endif
+1 -1
kernel/locking/lockdep_proc.c
··· 68 68 69 69 seq_printf(m, "%p", class->key); 70 70 #ifdef CONFIG_DEBUG_LOCKDEP 71 - seq_printf(m, " OPS:%8ld", class->ops); 71 + seq_printf(m, " OPS:%8ld", debug_class_ops_read(class)); 72 72 #endif 73 73 #ifdef CONFIG_PROVE_LOCKING 74 74 seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class));