Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] files: rcuref APIs

Adds a set of primitives to do reference counting for objects that are looked
up without locks using RCU.

Signed-off-by: Ravikiran Thirumalai <kiran_th@gmail.com>
Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Dipankar Sarma and committed by
Linus Torvalds
c0dfb290 8b6490e5

+308
+74
Documentation/RCU/rcuref.txt
··· 1 + Refcounter framework for elements of lists/arrays protected by 2 + RCU. 3 + 4 + Refcounting on elements of lists which are protected by traditional 5 + reader/writer spinlocks or semaphores are straight forward as in: 6 + 7 + 1. 2. 8 + add() search_and_reference() 9 + { { 10 + alloc_object read_lock(&list_lock); 11 + ... search_for_element 12 + atomic_set(&el->rc, 1); atomic_inc(&el->rc); 13 + write_lock(&list_lock); ... 14 + add_element read_unlock(&list_lock); 15 + ... ... 16 + write_unlock(&list_lock); } 17 + } 18 + 19 + 3. 4. 20 + release_referenced() delete() 21 + { { 22 + ... write_lock(&list_lock); 23 + atomic_dec(&el->rc, relfunc) ... 24 + ... delete_element 25 + } write_unlock(&list_lock); 26 + ... 27 + if (atomic_dec_and_test(&el->rc)) 28 + kfree(el); 29 + ... 30 + } 31 + 32 + If this list/array is made lock free using rcu as in changing the 33 + write_lock in add() and delete() to spin_lock and changing read_lock 34 + in search_and_reference to rcu_read_lock(), the rcuref_get in 35 + search_and_reference could potentially hold reference to an element which 36 + has already been deleted from the list/array. rcuref_lf_get_rcu takes 37 + care of this scenario. search_and_reference should look as; 38 + 39 + 1. 2. 40 + add() search_and_reference() 41 + { { 42 + alloc_object rcu_read_lock(); 43 + ... search_for_element 44 + atomic_set(&el->rc, 1); if (rcuref_inc_lf(&el->rc)) { 45 + write_lock(&list_lock); rcu_read_unlock(); 46 + return FAIL; 47 + add_element } 48 + ... ... 49 + write_unlock(&list_lock); rcu_read_unlock(); 50 + } } 51 + 3. 4. 52 + release_referenced() delete() 53 + { { 54 + ... write_lock(&list_lock); 55 + rcuref_dec(&el->rc, relfunc) ... 56 + ... delete_element 57 + } write_unlock(&list_lock); 58 + ... 59 + if (rcuref_dec_and_test(&el->rc)) 60 + call_rcu(&el->head, el_free); 61 + ... 62 + } 63 + 64 + Sometimes, reference to the element need to be obtained in the 65 + update (write) stream. In such cases, rcuref_inc_lf might be an overkill 66 + since the spinlock serialising list updates are held. rcuref_inc 67 + is to be used in such cases. 68 + For arches which do not have cmpxchg rcuref_inc_lf 69 + api uses a hashed spinlock implementation and the same hashed spinlock 70 + is acquired in all rcuref_xxx primitives to preserve atomicity. 71 + Note: Use rcuref_inc api only if you need to use rcuref_inc_lf on the 72 + refcounter atleast at one place. Mixing rcuref_inc and atomic_xxx api 73 + might lead to races. rcuref_inc_lf() must be used in lockfree 74 + RCU critical sections only.
+220
include/linux/rcuref.h
··· 1 + /* 2 + * rcuref.h 3 + * 4 + * Reference counting for elements of lists/arrays protected by 5 + * RCU. 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License as published by 9 + * the Free Software Foundation; either version 2 of the License, or 10 + * (at your option) any later version. 11 + * 12 + * This program is distributed in the hope that it will be useful, 13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 + * GNU General Public License for more details. 16 + * 17 + * You should have received a copy of the GNU General Public License 18 + * along with this program; if not, write to the Free Software 19 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 20 + * 21 + * Copyright (C) IBM Corporation, 2005 22 + * 23 + * Author: Dipankar Sarma <dipankar@in.ibm.com> 24 + * Ravikiran Thirumalai <kiran_th@gmail.com> 25 + * 26 + * See Documentation/RCU/rcuref.txt for detailed user guide. 27 + * 28 + */ 29 + 30 + #ifndef _RCUREF_H_ 31 + #define _RCUREF_H_ 32 + 33 + #ifdef __KERNEL__ 34 + 35 + #include <linux/types.h> 36 + #include <linux/interrupt.h> 37 + #include <linux/spinlock.h> 38 + #include <asm/atomic.h> 39 + 40 + /* 41 + * These APIs work on traditional atomic_t counters used in the 42 + * kernel for reference counting. Under special circumstances 43 + * where a lock-free get() operation races with a put() operation 44 + * these APIs can be used. See Documentation/RCU/rcuref.txt. 45 + */ 46 + 47 + #ifdef __HAVE_ARCH_CMPXCHG 48 + 49 + /** 50 + * rcuref_inc - increment refcount for object. 51 + * @rcuref: reference counter in the object in question. 52 + * 53 + * This should be used only for objects where we use RCU and 54 + * use the rcuref_inc_lf() api to acquire a reference 55 + * in a lock-free reader-side critical section. 56 + */ 57 + static inline void rcuref_inc(atomic_t *rcuref) 58 + { 59 + atomic_inc(rcuref); 60 + } 61 + 62 + /** 63 + * rcuref_dec - decrement refcount for object. 64 + * @rcuref: reference counter in the object in question. 65 + * 66 + * This should be used only for objects where we use RCU and 67 + * use the rcuref_inc_lf() api to acquire a reference 68 + * in a lock-free reader-side critical section. 69 + */ 70 + static inline void rcuref_dec(atomic_t *rcuref) 71 + { 72 + atomic_dec(rcuref); 73 + } 74 + 75 + /** 76 + * rcuref_dec_and_test - decrement refcount for object and test 77 + * @rcuref: reference counter in the object. 78 + * @release: pointer to the function that will clean up the object 79 + * when the last reference to the object is released. 80 + * This pointer is required. 81 + * 82 + * Decrement the refcount, and if 0, return 1. Else return 0. 83 + * 84 + * This should be used only for objects where we use RCU and 85 + * use the rcuref_inc_lf() api to acquire a reference 86 + * in a lock-free reader-side critical section. 87 + */ 88 + static inline int rcuref_dec_and_test(atomic_t *rcuref) 89 + { 90 + return atomic_dec_and_test(rcuref); 91 + } 92 + 93 + /* 94 + * cmpxchg is needed on UP too, if deletions to the list/array can happen 95 + * in interrupt context. 96 + */ 97 + 98 + /** 99 + * rcuref_inc_lf - Take reference to an object in a read-side 100 + * critical section protected by RCU. 101 + * @rcuref: reference counter in the object in question. 102 + * 103 + * Try and increment the refcount by 1. The increment might fail if 104 + * the reference counter has been through a 1 to 0 transition and 105 + * is no longer part of the lock-free list. 106 + * Returns non-zero on successful increment and zero otherwise. 107 + */ 108 + static inline int rcuref_inc_lf(atomic_t *rcuref) 109 + { 110 + int c, old; 111 + c = atomic_read(rcuref); 112 + while (c && (old = cmpxchg(&rcuref->counter, c, c + 1)) != c) 113 + c = old; 114 + return c; 115 + } 116 + 117 + #else /* !__HAVE_ARCH_CMPXCHG */ 118 + 119 + extern spinlock_t __rcuref_hash[]; 120 + 121 + /* 122 + * Use a hash table of locks to protect the reference count 123 + * since cmpxchg is not available in this arch. 124 + */ 125 + #ifdef CONFIG_SMP 126 + #define RCUREF_HASH_SIZE 4 127 + #define RCUREF_HASH(k) \ 128 + (&__rcuref_hash[(((unsigned long)k)>>8) & (RCUREF_HASH_SIZE-1)]) 129 + #else 130 + #define RCUREF_HASH_SIZE 1 131 + #define RCUREF_HASH(k) &__rcuref_hash[0] 132 + #endif /* CONFIG_SMP */ 133 + 134 + /** 135 + * rcuref_inc - increment refcount for object. 136 + * @rcuref: reference counter in the object in question. 137 + * 138 + * This should be used only for objects where we use RCU and 139 + * use the rcuref_inc_lf() api to acquire a reference in a lock-free 140 + * reader-side critical section. 141 + */ 142 + static inline void rcuref_inc(atomic_t *rcuref) 143 + { 144 + unsigned long flags; 145 + spin_lock_irqsave(RCUREF_HASH(rcuref), flags); 146 + rcuref->counter += 1; 147 + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); 148 + } 149 + 150 + /** 151 + * rcuref_dec - decrement refcount for object. 152 + * @rcuref: reference counter in the object in question. 153 + * 154 + * This should be used only for objects where we use RCU and 155 + * use the rcuref_inc_lf() api to acquire a reference in a lock-free 156 + * reader-side critical section. 157 + */ 158 + static inline void rcuref_dec(atomic_t *rcuref) 159 + { 160 + unsigned long flags; 161 + spin_lock_irqsave(RCUREF_HASH(rcuref), flags); 162 + rcuref->counter -= 1; 163 + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); 164 + } 165 + 166 + /** 167 + * rcuref_dec_and_test - decrement refcount for object and test 168 + * @rcuref: reference counter in the object. 169 + * @release: pointer to the function that will clean up the object 170 + * when the last reference to the object is released. 171 + * This pointer is required. 172 + * 173 + * Decrement the refcount, and if 0, return 1. Else return 0. 174 + * 175 + * This should be used only for objects where we use RCU and 176 + * use the rcuref_inc_lf() api to acquire a reference in a lock-free 177 + * reader-side critical section. 178 + */ 179 + static inline int rcuref_dec_and_test(atomic_t *rcuref) 180 + { 181 + unsigned long flags; 182 + spin_lock_irqsave(RCUREF_HASH(rcuref), flags); 183 + rcuref->counter--; 184 + if (!rcuref->counter) { 185 + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); 186 + return 1; 187 + } else { 188 + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); 189 + return 0; 190 + } 191 + } 192 + 193 + /** 194 + * rcuref_inc_lf - Take reference to an object of a lock-free collection 195 + * by traversing a lock-free list/array. 196 + * @rcuref: reference counter in the object in question. 197 + * 198 + * Try and increment the refcount by 1. The increment might fail if 199 + * the reference counter has been through a 1 to 0 transition and 200 + * object is no longer part of the lock-free list. 201 + * Returns non-zero on successful increment and zero otherwise. 202 + */ 203 + static inline int rcuref_inc_lf(atomic_t *rcuref) 204 + { 205 + int ret; 206 + unsigned long flags; 207 + spin_lock_irqsave(RCUREF_HASH(rcuref), flags); 208 + if (rcuref->counter) 209 + ret = rcuref->counter++; 210 + else 211 + ret = 0; 212 + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); 213 + return ret; 214 + } 215 + 216 + 217 + #endif /* !__HAVE_ARCH_CMPXCHG */ 218 + 219 + #endif /* __KERNEL__ */ 220 + #endif /* _RCUREF_H_ */
+14
kernel/rcupdate.c
··· 45 45 #include <linux/percpu.h> 46 46 #include <linux/notifier.h> 47 47 #include <linux/rcupdate.h> 48 + #include <linux/rcuref.h> 48 49 #include <linux/cpu.h> 49 50 50 51 /* Definition for rcupdate control block. */ ··· 72 71 /* Fake initialization required by compiler */ 73 72 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; 74 73 static int maxbatch = 10; 74 + 75 + #ifndef __HAVE_ARCH_CMPXCHG 76 + /* 77 + * We use an array of spinlocks for the rcurefs -- similar to ones in sparc 78 + * 32 bit atomic_t implementations, and a hash function similar to that 79 + * for our refcounting needs. 80 + * Can't help multiprocessors which donot have cmpxchg :( 81 + */ 82 + 83 + spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { 84 + [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED 85 + }; 86 + #endif 75 87 76 88 /** 77 89 * call_rcu - Queue an RCU callback for invocation after a grace period.