Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

seqcount: Add lockdep functionality to seqcount/seqlock structures

Currently seqlocks and seqcounts don't support lockdep.

After running across a seqcount related deadlock in the timekeeping
code, I used a less-refined and more focused variant of this patch
to narrow down the cause of the issue.

This is a first-pass attempt to properly enable lockdep functionality
on seqlocks and seqcounts.

Since seqcounts are used in the vdso gettimeofday code, I've provided
non-lockdep accessors for those needs.

I've also handled one case where there were nested seqlock writers
and there may be more edge cases.

Comments and feedback would be appreciated!

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Link: http://lkml.kernel.org/r/1381186321-4906-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

John Stultz and committed by
Ingo Molnar
1ca7d67c 827da44c

+90 -21
+4 -4
arch/x86/vdso/vclock_gettime.c
··· 178 178 179 179 ts->tv_nsec = 0; 180 180 do { 181 - seq = read_seqcount_begin(&gtod->seq); 181 + seq = read_seqcount_begin_no_lockdep(&gtod->seq); 182 182 mode = gtod->clock.vclock_mode; 183 183 ts->tv_sec = gtod->wall_time_sec; 184 184 ns = gtod->wall_time_snsec; ··· 198 198 199 199 ts->tv_nsec = 0; 200 200 do { 201 - seq = read_seqcount_begin(&gtod->seq); 201 + seq = read_seqcount_begin_no_lockdep(&gtod->seq); 202 202 mode = gtod->clock.vclock_mode; 203 203 ts->tv_sec = gtod->monotonic_time_sec; 204 204 ns = gtod->monotonic_time_snsec; ··· 214 214 { 215 215 unsigned long seq; 216 216 do { 217 - seq = read_seqcount_begin(&gtod->seq); 217 + seq = read_seqcount_begin_no_lockdep(&gtod->seq); 218 218 ts->tv_sec = gtod->wall_time_coarse.tv_sec; 219 219 ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; 220 220 } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); ··· 225 225 { 226 226 unsigned long seq; 227 227 do { 228 - seq = read_seqcount_begin(&gtod->seq); 228 + seq = read_seqcount_begin_no_lockdep(&gtod->seq); 229 229 ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; 230 230 ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; 231 231 } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+2 -2
fs/dcache.c
··· 2574 2574 dentry_lock_for_move(dentry, target); 2575 2575 2576 2576 write_seqcount_begin(&dentry->d_seq); 2577 - write_seqcount_begin(&target->d_seq); 2577 + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); 2578 2578 2579 2579 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ 2580 2580 ··· 2706 2706 dentry_lock_for_move(anon, dentry); 2707 2707 2708 2708 write_seqcount_begin(&dentry->d_seq); 2709 - write_seqcount_begin(&anon->d_seq); 2709 + write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED); 2710 2710 2711 2711 dparent = dentry->d_parent; 2712 2712
+1 -1
fs/fs_struct.c
··· 161 161 struct fs_struct init_fs = { 162 162 .users = 1, 163 163 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), 164 - .seq = SEQCNT_ZERO, 164 + .seq = SEQCNT_ZERO(init_fs.seq), 165 165 .umask = 0022, 166 166 };
+4 -4
include/linux/init_task.h
··· 32 32 #endif 33 33 34 34 #ifdef CONFIG_CPUSETS 35 - #define INIT_CPUSET_SEQ \ 36 - .mems_allowed_seq = SEQCNT_ZERO, 35 + #define INIT_CPUSET_SEQ(tsk) \ 36 + .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), 37 37 #else 38 - #define INIT_CPUSET_SEQ 38 + #define INIT_CPUSET_SEQ(tsk) 39 39 #endif 40 40 41 41 #define INIT_SIGNALS(sig) { \ ··· 220 220 INIT_FTRACE_GRAPH \ 221 221 INIT_TRACE_RECURSION \ 222 222 INIT_TASK_RCU_PREEMPT(tsk) \ 223 - INIT_CPUSET_SEQ \ 223 + INIT_CPUSET_SEQ(tsk) \ 224 224 INIT_VTIME(tsk) \ 225 225 } 226 226
+6 -2
include/linux/lockdep.h
··· 497 497 #define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) 498 498 #define rwlock_release(l, n, i) lock_release(l, n, i) 499 499 500 + #define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) 501 + #define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) 502 + #define seqcount_release(l, n, i) lock_release(l, n, i) 503 + 500 504 #define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) 501 505 #define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) 502 506 #define mutex_release(l, n, i) lock_release(l, n, i) ··· 508 504 #define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) 509 505 #define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) 510 506 #define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i) 511 - # define rwsem_release(l, n, i) lock_release(l, n, i) 507 + #define rwsem_release(l, n, i) lock_release(l, n, i) 512 508 513 509 #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) 514 510 #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) 515 - # define lock_map_release(l) lock_release(l, 1, _THIS_IP_) 511 + #define lock_map_release(l) lock_release(l, 1, _THIS_IP_) 516 512 517 513 #ifdef CONFIG_PROVE_LOCKING 518 514 # define might_lock(lock) \
+72 -7
include/linux/seqlock.h
··· 34 34 35 35 #include <linux/spinlock.h> 36 36 #include <linux/preempt.h> 37 + #include <linux/lockdep.h> 37 38 #include <asm/processor.h> 38 39 39 40 /* ··· 45 44 */ 46 45 typedef struct seqcount { 47 46 unsigned sequence; 47 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 48 + struct lockdep_map dep_map; 49 + #endif 48 50 } seqcount_t; 49 51 50 - #define SEQCNT_ZERO { 0 } 51 - #define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0) 52 + static inline void __seqcount_init(seqcount_t *s, const char *name, 53 + struct lock_class_key *key) 54 + { 55 + /* 56 + * Make sure we are not reinitializing a held lock: 57 + */ 58 + lockdep_init_map(&s->dep_map, name, key, 0); 59 + s->sequence = 0; 60 + } 61 + 62 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 63 + # define SEQCOUNT_DEP_MAP_INIT(lockname) \ 64 + .dep_map = { .name = #lockname } \ 65 + 66 + # define seqcount_init(s) \ 67 + do { \ 68 + static struct lock_class_key __key; \ 69 + __seqcount_init((s), #s, &__key); \ 70 + } while (0) 71 + 72 + static inline void seqcount_lockdep_reader_access(const seqcount_t *s) 73 + { 74 + seqcount_t *l = (seqcount_t *)s; 75 + unsigned long flags; 76 + 77 + local_irq_save(flags); 78 + seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); 79 + seqcount_release(&l->dep_map, 1, _RET_IP_); 80 + local_irq_restore(flags); 81 + } 82 + 83 + #else 84 + # define SEQCOUNT_DEP_MAP_INIT(lockname) 85 + # define seqcount_init(s) __seqcount_init(s, NULL, NULL) 86 + # define seqcount_lockdep_reader_access(x) 87 + #endif 88 + 89 + #define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} 90 + 52 91 53 92 /** 54 93 * __read_seqcount_begin - begin a seq-read critical section (without barrier) ··· 117 76 } 118 77 119 78 /** 79 + * read_seqcount_begin_no_lockdep - start seq-read critical section w/o lockdep 80 + * @s: pointer to seqcount_t 81 + * Returns: count to be passed to read_seqcount_retry 82 + * 83 + * read_seqcount_begin_no_lockdep opens a read critical section of the given 84 + * seqcount, but without any lockdep checking. Validity of the critical 85 + * section is tested by checking read_seqcount_retry function. 86 + */ 87 + static inline unsigned read_seqcount_begin_no_lockdep(const seqcount_t *s) 88 + { 89 + unsigned ret = __read_seqcount_begin(s); 90 + smp_rmb(); 91 + return ret; 92 + } 93 + 94 + /** 120 95 * read_seqcount_begin - begin a seq-read critical section 121 96 * @s: pointer to seqcount_t 122 97 * Returns: count to be passed to read_seqcount_retry ··· 143 86 */ 144 87 static inline unsigned read_seqcount_begin(const seqcount_t *s) 145 88 { 146 - unsigned ret = __read_seqcount_begin(s); 147 - smp_rmb(); 148 - return ret; 89 + seqcount_lockdep_reader_access(s); 90 + return read_seqcount_begin_no_lockdep(s); 149 91 } 150 92 151 93 /** ··· 164 108 static inline unsigned raw_seqcount_begin(const seqcount_t *s) 165 109 { 166 110 unsigned ret = ACCESS_ONCE(s->sequence); 111 + 112 + seqcount_lockdep_reader_access(s); 167 113 smp_rmb(); 168 114 return ret & ~1; 169 115 } ··· 210 152 * Sequence counter only version assumes that callers are using their 211 153 * own mutexing. 212 154 */ 213 - static inline void write_seqcount_begin(seqcount_t *s) 155 + static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) 214 156 { 215 157 s->sequence++; 216 158 smp_wmb(); 159 + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); 160 + } 161 + 162 + static inline void write_seqcount_begin(seqcount_t *s) 163 + { 164 + write_seqcount_begin_nested(s, 0); 217 165 } 218 166 219 167 static inline void write_seqcount_end(seqcount_t *s) 220 168 { 169 + seqcount_release(&s->dep_map, 1, _RET_IP_); 221 170 smp_wmb(); 222 171 s->sequence++; 223 172 } ··· 253 188 */ 254 189 #define __SEQLOCK_UNLOCKED(lockname) \ 255 190 { \ 256 - .seqcount = SEQCNT_ZERO, \ 191 + .seqcount = SEQCNT_ZERO(lockname), \ 257 192 .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 258 193 } 259 194
+1 -1
mm/filemap_xip.c
··· 26 26 * of ZERO_PAGE(), such as /dev/zero 27 27 */ 28 28 static DEFINE_MUTEX(xip_sparse_mutex); 29 - static seqcount_t xip_sparse_seq = SEQCNT_ZERO; 29 + static seqcount_t xip_sparse_seq = SEQCNT_ZERO(xip_sparse_seq); 30 30 static struct page *__xip_sparse_page; 31 31 32 32 /* called under xip_sparse_mutex */