Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

klist: don't iterate over deleted entries

A klist entry is kept on the list till all its current iterations are
finished; however, a new iteration after deletion also iterates over
deleted entries as long as their reference count stays above zero.
This causes problems for cases where there are users which iterate
over the list while synchronized against list manipulations and
natuarally expect already deleted entries to not show up during
iteration.

This patch implements dead flag which gets set on deletion so that
iteration can skip already deleted entries. The dead flag piggy backs
on the lowest bit of knode->n_klist and only visible to klist
implementation proper.

While at it, drop klist_iter->i_head as it's redundant and doesn't
offer anything in semantics or performance wise as klist_iter->i_klist
is dereferenced on every iteration anyway.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

authored by

Tejun Heo and committed by
Jens Axboe
a1ed5b0c 710027a4

+74 -31
+1 -2
include/linux/klist.h
··· 38 38 void (*put)(struct klist_node *)); 39 39 40 40 struct klist_node { 41 - struct klist *n_klist; 41 + void *n_klist; /* never access directly */ 42 42 struct list_head n_node; 43 43 struct kref n_ref; 44 44 struct completion n_removed; ··· 57 57 58 58 struct klist_iter { 59 59 struct klist *i_klist; 60 - struct list_head *i_head; 61 60 struct klist_node *i_cur; 62 61 }; 63 62
+73 -29
lib/klist.c
··· 37 37 #include <linux/klist.h> 38 38 #include <linux/module.h> 39 39 40 + /* 41 + * Use the lowest bit of n_klist to mark deleted nodes and exclude 42 + * dead ones from iteration. 43 + */ 44 + #define KNODE_DEAD 1LU 45 + #define KNODE_KLIST_MASK ~KNODE_DEAD 46 + 47 + static struct klist *knode_klist(struct klist_node *knode) 48 + { 49 + return (struct klist *) 50 + ((unsigned long)knode->n_klist & KNODE_KLIST_MASK); 51 + } 52 + 53 + static bool knode_dead(struct klist_node *knode) 54 + { 55 + return (unsigned long)knode->n_klist & KNODE_DEAD; 56 + } 57 + 58 + static void knode_set_klist(struct klist_node *knode, struct klist *klist) 59 + { 60 + knode->n_klist = klist; 61 + /* no knode deserves to start its life dead */ 62 + WARN_ON(knode_dead(knode)); 63 + } 64 + 65 + static void knode_kill(struct klist_node *knode) 66 + { 67 + /* and no knode should die twice ever either, see we're very humane */ 68 + WARN_ON(knode_dead(knode)); 69 + *(unsigned long *)&knode->n_klist |= KNODE_DEAD; 70 + } 40 71 41 72 /** 42 73 * klist_init - Initialize a klist structure. ··· 110 79 INIT_LIST_HEAD(&n->n_node); 111 80 init_completion(&n->n_removed); 112 81 kref_init(&n->n_ref); 113 - n->n_klist = k; 82 + knode_set_klist(n, k); 114 83 if (k->get) 115 84 k->get(n); 116 85 } ··· 146 115 */ 147 116 void klist_add_after(struct klist_node *n, struct klist_node *pos) 148 117 { 149 - struct klist *k = pos->n_klist; 118 + struct klist *k = knode_klist(pos); 150 119 151 120 klist_node_init(k, n); 152 121 spin_lock(&k->k_lock); ··· 162 131 */ 163 132 void klist_add_before(struct klist_node *n, struct klist_node *pos) 164 133 { 165 - struct klist *k = pos->n_klist; 134 + struct klist *k = knode_klist(pos); 166 135 167 136 klist_node_init(k, n); 168 137 spin_lock(&k->k_lock); ··· 175 144 { 176 145 struct klist_node *n = container_of(kref, struct klist_node, n_ref); 177 146 147 + WARN_ON(!knode_dead(n)); 178 148 list_del(&n->n_node); 179 149 complete(&n->n_removed); 180 - n->n_klist = NULL; 150 + knode_set_klist(n, NULL); 181 151 } 182 152 183 153 static int klist_dec_and_del(struct klist_node *n) 184 154 { 185 155 return kref_put(&n->n_ref, klist_release); 156 + } 157 + 158 + static void klist_put(struct klist_node *n, bool kill) 159 + { 160 + struct klist *k = knode_klist(n); 161 + void (*put)(struct klist_node *) = k->put; 162 + 163 + spin_lock(&k->k_lock); 164 + if (kill) 165 + knode_kill(n); 166 + if (!klist_dec_and_del(n)) 167 + put = NULL; 168 + spin_unlock(&k->k_lock); 169 + if (put) 170 + put(n); 186 171 } 187 172 188 173 /** ··· 207 160 */ 208 161 void klist_del(struct klist_node *n) 209 162 { 210 - struct klist *k = n->n_klist; 211 - void (*put)(struct klist_node *) = k->put; 212 - 213 - spin_lock(&k->k_lock); 214 - if (!klist_dec_and_del(n)) 215 - put = NULL; 216 - spin_unlock(&k->k_lock); 217 - if (put) 218 - put(n); 163 + klist_put(n, true); 219 164 } 220 165 EXPORT_SYMBOL_GPL(klist_del); 221 166 ··· 245 206 struct klist_node *n) 246 207 { 247 208 i->i_klist = k; 248 - i->i_head = &k->k_list; 249 209 i->i_cur = n; 250 210 if (n) 251 211 kref_get(&n->n_ref); ··· 275 237 void klist_iter_exit(struct klist_iter *i) 276 238 { 277 239 if (i->i_cur) { 278 - klist_del(i->i_cur); 240 + klist_put(i->i_cur, false); 279 241 i->i_cur = NULL; 280 242 } 281 243 } ··· 296 258 */ 297 259 struct klist_node *klist_next(struct klist_iter *i) 298 260 { 299 - struct list_head *next; 300 - struct klist_node *lnode = i->i_cur; 301 - struct klist_node *knode = NULL; 302 261 void (*put)(struct klist_node *) = i->i_klist->put; 262 + struct klist_node *last = i->i_cur; 263 + struct klist_node *next; 303 264 304 265 spin_lock(&i->i_klist->k_lock); 305 - if (lnode) { 306 - next = lnode->n_node.next; 307 - if (!klist_dec_and_del(lnode)) 266 + 267 + if (last) { 268 + next = to_klist_node(last->n_node.next); 269 + if (!klist_dec_and_del(last)) 308 270 put = NULL; 309 271 } else 310 - next = i->i_head->next; 272 + next = to_klist_node(i->i_klist->k_list.next); 311 273 312 - if (next != i->i_head) { 313 - knode = to_klist_node(next); 314 - kref_get(&knode->n_ref); 274 + i->i_cur = NULL; 275 + while (next != to_klist_node(&i->i_klist->k_list)) { 276 + if (likely(!knode_dead(next))) { 277 + kref_get(&next->n_ref); 278 + i->i_cur = next; 279 + break; 280 + } 281 + next = to_klist_node(next->n_node.next); 315 282 } 316 - i->i_cur = knode; 283 + 317 284 spin_unlock(&i->i_klist->k_lock); 318 - if (put && lnode) 319 - put(lnode); 320 - return knode; 285 + 286 + if (put && last) 287 + put(last); 288 + return i->i_cur; 321 289 } 322 290 EXPORT_SYMBOL_GPL(klist_next);