Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'locking/rcuref' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pulling rcurefs from Peter for tglx's work.

Link: https://lore.kernel.org/all/20230328084534.GE4253@hirez.programming.kicks-ass.net/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+752 -19
+199 -9
include/linux/atomic/atomic-arch-fallback.h
··· 1208 1208 #define arch_atomic_inc_and_test arch_atomic_inc_and_test 1209 1209 #endif 1210 1210 1211 + #ifndef arch_atomic_add_negative_relaxed 1212 + #ifdef arch_atomic_add_negative 1213 + #define arch_atomic_add_negative_acquire arch_atomic_add_negative 1214 + #define arch_atomic_add_negative_release arch_atomic_add_negative 1215 + #define arch_atomic_add_negative_relaxed arch_atomic_add_negative 1216 + #endif /* arch_atomic_add_negative */ 1217 + 1211 1218 #ifndef arch_atomic_add_negative 1212 1219 /** 1213 - * arch_atomic_add_negative - add and test if negative 1220 + * arch_atomic_add_negative - Add and test if negative 1214 1221 * @i: integer value to add 1215 1222 * @v: pointer of type atomic_t 1216 1223 * 1217 - * Atomically adds @i to @v and returns true 1218 - * if the result is negative, or false when 1219 - * result is greater than or equal to zero. 1224 + * Atomically adds @i to @v and returns true if the result is negative, 1225 + * or false when the result is greater than or equal to zero. 1220 1226 */ 1221 1227 static __always_inline bool 1222 1228 arch_atomic_add_negative(int i, atomic_t *v) ··· 1231 1225 } 1232 1226 #define arch_atomic_add_negative arch_atomic_add_negative 1233 1227 #endif 1228 + 1229 + #ifndef arch_atomic_add_negative_acquire 1230 + /** 1231 + * arch_atomic_add_negative_acquire - Add and test if negative 1232 + * @i: integer value to add 1233 + * @v: pointer of type atomic_t 1234 + * 1235 + * Atomically adds @i to @v and returns true if the result is negative, 1236 + * or false when the result is greater than or equal to zero. 1237 + */ 1238 + static __always_inline bool 1239 + arch_atomic_add_negative_acquire(int i, atomic_t *v) 1240 + { 1241 + return arch_atomic_add_return_acquire(i, v) < 0; 1242 + } 1243 + #define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire 1244 + #endif 1245 + 1246 + #ifndef arch_atomic_add_negative_release 1247 + /** 1248 + * arch_atomic_add_negative_release - Add and test if negative 1249 + * @i: integer value to add 1250 + * @v: pointer of type atomic_t 1251 + * 1252 + * Atomically adds @i to @v and returns true if the result is negative, 1253 + * or false when the result is greater than or equal to zero. 1254 + */ 1255 + static __always_inline bool 1256 + arch_atomic_add_negative_release(int i, atomic_t *v) 1257 + { 1258 + return arch_atomic_add_return_release(i, v) < 0; 1259 + } 1260 + #define arch_atomic_add_negative_release arch_atomic_add_negative_release 1261 + #endif 1262 + 1263 + #ifndef arch_atomic_add_negative_relaxed 1264 + /** 1265 + * arch_atomic_add_negative_relaxed - Add and test if negative 1266 + * @i: integer value to add 1267 + * @v: pointer of type atomic_t 1268 + * 1269 + * Atomically adds @i to @v and returns true if the result is negative, 1270 + * or false when the result is greater than or equal to zero. 1271 + */ 1272 + static __always_inline bool 1273 + arch_atomic_add_negative_relaxed(int i, atomic_t *v) 1274 + { 1275 + return arch_atomic_add_return_relaxed(i, v) < 0; 1276 + } 1277 + #define arch_atomic_add_negative_relaxed arch_atomic_add_negative_relaxed 1278 + #endif 1279 + 1280 + #else /* arch_atomic_add_negative_relaxed */ 1281 + 1282 + #ifndef arch_atomic_add_negative_acquire 1283 + static __always_inline bool 1284 + arch_atomic_add_negative_acquire(int i, atomic_t *v) 1285 + { 1286 + bool ret = arch_atomic_add_negative_relaxed(i, v); 1287 + __atomic_acquire_fence(); 1288 + return ret; 1289 + } 1290 + #define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire 1291 + #endif 1292 + 1293 + #ifndef arch_atomic_add_negative_release 1294 + static __always_inline bool 1295 + arch_atomic_add_negative_release(int i, atomic_t *v) 1296 + { 1297 + __atomic_release_fence(); 1298 + return arch_atomic_add_negative_relaxed(i, v); 1299 + } 1300 + #define arch_atomic_add_negative_release arch_atomic_add_negative_release 1301 + #endif 1302 + 1303 + #ifndef arch_atomic_add_negative 1304 + static __always_inline bool 1305 + arch_atomic_add_negative(int i, atomic_t *v) 1306 + { 1307 + bool ret; 1308 + __atomic_pre_full_fence(); 1309 + ret = arch_atomic_add_negative_relaxed(i, v); 1310 + __atomic_post_full_fence(); 1311 + return ret; 1312 + } 1313 + #define arch_atomic_add_negative arch_atomic_add_negative 1314 + #endif 1315 + 1316 + #endif /* arch_atomic_add_negative_relaxed */ 1234 1317 1235 1318 #ifndef arch_atomic_fetch_add_unless 1236 1319 /** ··· 2424 2329 #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test 2425 2330 #endif 2426 2331 2332 + #ifndef arch_atomic64_add_negative_relaxed 2333 + #ifdef arch_atomic64_add_negative 2334 + #define arch_atomic64_add_negative_acquire arch_atomic64_add_negative 2335 + #define arch_atomic64_add_negative_release arch_atomic64_add_negative 2336 + #define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative 2337 + #endif /* arch_atomic64_add_negative */ 2338 + 2427 2339 #ifndef arch_atomic64_add_negative 2428 2340 /** 2429 - * arch_atomic64_add_negative - add and test if negative 2341 + * arch_atomic64_add_negative - Add and test if negative 2430 2342 * @i: integer value to add 2431 2343 * @v: pointer of type atomic64_t 2432 2344 * 2433 - * Atomically adds @i to @v and returns true 2434 - * if the result is negative, or false when 2435 - * result is greater than or equal to zero. 2345 + * Atomically adds @i to @v and returns true if the result is negative, 2346 + * or false when the result is greater than or equal to zero. 2436 2347 */ 2437 2348 static __always_inline bool 2438 2349 arch_atomic64_add_negative(s64 i, atomic64_t *v) ··· 2447 2346 } 2448 2347 #define arch_atomic64_add_negative arch_atomic64_add_negative 2449 2348 #endif 2349 + 2350 + #ifndef arch_atomic64_add_negative_acquire 2351 + /** 2352 + * arch_atomic64_add_negative_acquire - Add and test if negative 2353 + * @i: integer value to add 2354 + * @v: pointer of type atomic64_t 2355 + * 2356 + * Atomically adds @i to @v and returns true if the result is negative, 2357 + * or false when the result is greater than or equal to zero. 2358 + */ 2359 + static __always_inline bool 2360 + arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v) 2361 + { 2362 + return arch_atomic64_add_return_acquire(i, v) < 0; 2363 + } 2364 + #define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire 2365 + #endif 2366 + 2367 + #ifndef arch_atomic64_add_negative_release 2368 + /** 2369 + * arch_atomic64_add_negative_release - Add and test if negative 2370 + * @i: integer value to add 2371 + * @v: pointer of type atomic64_t 2372 + * 2373 + * Atomically adds @i to @v and returns true if the result is negative, 2374 + * or false when the result is greater than or equal to zero. 2375 + */ 2376 + static __always_inline bool 2377 + arch_atomic64_add_negative_release(s64 i, atomic64_t *v) 2378 + { 2379 + return arch_atomic64_add_return_release(i, v) < 0; 2380 + } 2381 + #define arch_atomic64_add_negative_release arch_atomic64_add_negative_release 2382 + #endif 2383 + 2384 + #ifndef arch_atomic64_add_negative_relaxed 2385 + /** 2386 + * arch_atomic64_add_negative_relaxed - Add and test if negative 2387 + * @i: integer value to add 2388 + * @v: pointer of type atomic64_t 2389 + * 2390 + * Atomically adds @i to @v and returns true if the result is negative, 2391 + * or false when the result is greater than or equal to zero. 2392 + */ 2393 + static __always_inline bool 2394 + arch_atomic64_add_negative_relaxed(s64 i, atomic64_t *v) 2395 + { 2396 + return arch_atomic64_add_return_relaxed(i, v) < 0; 2397 + } 2398 + #define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative_relaxed 2399 + #endif 2400 + 2401 + #else /* arch_atomic64_add_negative_relaxed */ 2402 + 2403 + #ifndef arch_atomic64_add_negative_acquire 2404 + static __always_inline bool 2405 + arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v) 2406 + { 2407 + bool ret = arch_atomic64_add_negative_relaxed(i, v); 2408 + __atomic_acquire_fence(); 2409 + return ret; 2410 + } 2411 + #define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire 2412 + #endif 2413 + 2414 + #ifndef arch_atomic64_add_negative_release 2415 + static __always_inline bool 2416 + arch_atomic64_add_negative_release(s64 i, atomic64_t *v) 2417 + { 2418 + __atomic_release_fence(); 2419 + return arch_atomic64_add_negative_relaxed(i, v); 2420 + } 2421 + #define arch_atomic64_add_negative_release arch_atomic64_add_negative_release 2422 + #endif 2423 + 2424 + #ifndef arch_atomic64_add_negative 2425 + static __always_inline bool 2426 + arch_atomic64_add_negative(s64 i, atomic64_t *v) 2427 + { 2428 + bool ret; 2429 + __atomic_pre_full_fence(); 2430 + ret = arch_atomic64_add_negative_relaxed(i, v); 2431 + __atomic_post_full_fence(); 2432 + return ret; 2433 + } 2434 + #define arch_atomic64_add_negative arch_atomic64_add_negative 2435 + #endif 2436 + 2437 + #endif /* arch_atomic64_add_negative_relaxed */ 2450 2438 2451 2439 #ifndef arch_atomic64_fetch_add_unless 2452 2440 /** ··· 2646 2456 #endif 2647 2457 2648 2458 #endif /* _LINUX_ATOMIC_FALLBACK_H */ 2649 - // b5e87bdd5ede61470c29f7a7e4de781af3770f09 2459 + // 00071fffa021cec66f6290d706d69c91df87bade
+67 -1
include/linux/atomic/atomic-instrumented.h
··· 592 592 return arch_atomic_add_negative(i, v); 593 593 } 594 594 595 + static __always_inline bool 596 + atomic_add_negative_acquire(int i, atomic_t *v) 597 + { 598 + instrument_atomic_read_write(v, sizeof(*v)); 599 + return arch_atomic_add_negative_acquire(i, v); 600 + } 601 + 602 + static __always_inline bool 603 + atomic_add_negative_release(int i, atomic_t *v) 604 + { 605 + kcsan_release(); 606 + instrument_atomic_read_write(v, sizeof(*v)); 607 + return arch_atomic_add_negative_release(i, v); 608 + } 609 + 610 + static __always_inline bool 611 + atomic_add_negative_relaxed(int i, atomic_t *v) 612 + { 613 + instrument_atomic_read_write(v, sizeof(*v)); 614 + return arch_atomic_add_negative_relaxed(i, v); 615 + } 616 + 595 617 static __always_inline int 596 618 atomic_fetch_add_unless(atomic_t *v, int a, int u) 597 619 { ··· 1231 1209 kcsan_mb(); 1232 1210 instrument_atomic_read_write(v, sizeof(*v)); 1233 1211 return arch_atomic64_add_negative(i, v); 1212 + } 1213 + 1214 + static __always_inline bool 1215 + atomic64_add_negative_acquire(s64 i, atomic64_t *v) 1216 + { 1217 + instrument_atomic_read_write(v, sizeof(*v)); 1218 + return arch_atomic64_add_negative_acquire(i, v); 1219 + } 1220 + 1221 + static __always_inline bool 1222 + atomic64_add_negative_release(s64 i, atomic64_t *v) 1223 + { 1224 + kcsan_release(); 1225 + instrument_atomic_read_write(v, sizeof(*v)); 1226 + return arch_atomic64_add_negative_release(i, v); 1227 + } 1228 + 1229 + static __always_inline bool 1230 + atomic64_add_negative_relaxed(s64 i, atomic64_t *v) 1231 + { 1232 + instrument_atomic_read_write(v, sizeof(*v)); 1233 + return arch_atomic64_add_negative_relaxed(i, v); 1234 1234 } 1235 1235 1236 1236 static __always_inline s64 ··· 1874 1830 return arch_atomic_long_add_negative(i, v); 1875 1831 } 1876 1832 1833 + static __always_inline bool 1834 + atomic_long_add_negative_acquire(long i, atomic_long_t *v) 1835 + { 1836 + instrument_atomic_read_write(v, sizeof(*v)); 1837 + return arch_atomic_long_add_negative_acquire(i, v); 1838 + } 1839 + 1840 + static __always_inline bool 1841 + atomic_long_add_negative_release(long i, atomic_long_t *v) 1842 + { 1843 + kcsan_release(); 1844 + instrument_atomic_read_write(v, sizeof(*v)); 1845 + return arch_atomic_long_add_negative_release(i, v); 1846 + } 1847 + 1848 + static __always_inline bool 1849 + atomic_long_add_negative_relaxed(long i, atomic_long_t *v) 1850 + { 1851 + instrument_atomic_read_write(v, sizeof(*v)); 1852 + return arch_atomic_long_add_negative_relaxed(i, v); 1853 + } 1854 + 1877 1855 static __always_inline long 1878 1856 atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) 1879 1857 { ··· 2149 2083 }) 2150 2084 2151 2085 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ 2152 - // 764f741eb77a7ad565dc8d99ce2837d5542e8aee 2086 + // 1b485de9cbaa4900de59e14ee2084357eaeb1c3a
+37 -1
include/linux/atomic/atomic-long.h
··· 479 479 return arch_atomic64_add_negative(i, v); 480 480 } 481 481 482 + static __always_inline bool 483 + arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v) 484 + { 485 + return arch_atomic64_add_negative_acquire(i, v); 486 + } 487 + 488 + static __always_inline bool 489 + arch_atomic_long_add_negative_release(long i, atomic_long_t *v) 490 + { 491 + return arch_atomic64_add_negative_release(i, v); 492 + } 493 + 494 + static __always_inline bool 495 + arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) 496 + { 497 + return arch_atomic64_add_negative_relaxed(i, v); 498 + } 499 + 482 500 static __always_inline long 483 501 arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) 484 502 { ··· 991 973 return arch_atomic_add_negative(i, v); 992 974 } 993 975 976 + static __always_inline bool 977 + arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v) 978 + { 979 + return arch_atomic_add_negative_acquire(i, v); 980 + } 981 + 982 + static __always_inline bool 983 + arch_atomic_long_add_negative_release(long i, atomic_long_t *v) 984 + { 985 + return arch_atomic_add_negative_release(i, v); 986 + } 987 + 988 + static __always_inline bool 989 + arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) 990 + { 991 + return arch_atomic_add_negative_relaxed(i, v); 992 + } 993 + 994 994 static __always_inline long 995 995 arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) 996 996 { ··· 1047 1011 1048 1012 #endif /* CONFIG_64BIT */ 1049 1013 #endif /* _LINUX_ATOMIC_LONG_H */ 1050 - // e8f0e08ff072b74d180eabe2ad001282b38c2c88 1014 + // a194c07d7d2f4b0e178d3c118c919775d5d65f50
+155
include/linux/rcuref.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + #ifndef _LINUX_RCUREF_H 3 + #define _LINUX_RCUREF_H 4 + 5 + #include <linux/atomic.h> 6 + #include <linux/bug.h> 7 + #include <linux/limits.h> 8 + #include <linux/lockdep.h> 9 + #include <linux/preempt.h> 10 + #include <linux/rcupdate.h> 11 + 12 + #define RCUREF_ONEREF 0x00000000U 13 + #define RCUREF_MAXREF 0x7FFFFFFFU 14 + #define RCUREF_SATURATED 0xA0000000U 15 + #define RCUREF_RELEASED 0xC0000000U 16 + #define RCUREF_DEAD 0xE0000000U 17 + #define RCUREF_NOREF 0xFFFFFFFFU 18 + 19 + /** 20 + * rcuref_init - Initialize a rcuref reference count with the given reference count 21 + * @ref: Pointer to the reference count 22 + * @cnt: The initial reference count typically '1' 23 + */ 24 + static inline void rcuref_init(rcuref_t *ref, unsigned int cnt) 25 + { 26 + atomic_set(&ref->refcnt, cnt - 1); 27 + } 28 + 29 + /** 30 + * rcuref_read - Read the number of held reference counts of a rcuref 31 + * @ref: Pointer to the reference count 32 + * 33 + * Return: The number of held references (0 ... N) 34 + */ 35 + static inline unsigned int rcuref_read(rcuref_t *ref) 36 + { 37 + unsigned int c = atomic_read(&ref->refcnt); 38 + 39 + /* Return 0 if within the DEAD zone. */ 40 + return c >= RCUREF_RELEASED ? 0 : c + 1; 41 + } 42 + 43 + extern __must_check bool rcuref_get_slowpath(rcuref_t *ref); 44 + 45 + /** 46 + * rcuref_get - Acquire one reference on a rcuref reference count 47 + * @ref: Pointer to the reference count 48 + * 49 + * Similar to atomic_inc_not_zero() but saturates at RCUREF_MAXREF. 50 + * 51 + * Provides no memory ordering, it is assumed the caller has guaranteed the 52 + * object memory to be stable (RCU, etc.). It does provide a control dependency 53 + * and thereby orders future stores. See documentation in lib/rcuref.c 54 + * 55 + * Return: 56 + * False if the attempt to acquire a reference failed. This happens 57 + * when the last reference has been put already 58 + * 59 + * True if a reference was successfully acquired 60 + */ 61 + static inline __must_check bool rcuref_get(rcuref_t *ref) 62 + { 63 + /* 64 + * Unconditionally increase the reference count. The saturation and 65 + * dead zones provide enough tolerance for this. 66 + */ 67 + if (likely(!atomic_add_negative_relaxed(1, &ref->refcnt))) 68 + return true; 69 + 70 + /* Handle the cases inside the saturation and dead zones */ 71 + return rcuref_get_slowpath(ref); 72 + } 73 + 74 + extern __must_check bool rcuref_put_slowpath(rcuref_t *ref); 75 + 76 + /* 77 + * Internal helper. Do not invoke directly. 78 + */ 79 + static __always_inline __must_check bool __rcuref_put(rcuref_t *ref) 80 + { 81 + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(), 82 + "suspicious rcuref_put_rcusafe() usage"); 83 + /* 84 + * Unconditionally decrease the reference count. The saturation and 85 + * dead zones provide enough tolerance for this. 86 + */ 87 + if (likely(!atomic_add_negative_release(-1, &ref->refcnt))) 88 + return false; 89 + 90 + /* 91 + * Handle the last reference drop and cases inside the saturation 92 + * and dead zones. 93 + */ 94 + return rcuref_put_slowpath(ref); 95 + } 96 + 97 + /** 98 + * rcuref_put_rcusafe -- Release one reference for a rcuref reference count RCU safe 99 + * @ref: Pointer to the reference count 100 + * 101 + * Provides release memory ordering, such that prior loads and stores are done 102 + * before, and provides an acquire ordering on success such that free() 103 + * must come after. 104 + * 105 + * Can be invoked from contexts, which guarantee that no grace period can 106 + * happen which would free the object concurrently if the decrement drops 107 + * the last reference and the slowpath races against a concurrent get() and 108 + * put() pair. rcu_read_lock()'ed and atomic contexts qualify. 109 + * 110 + * Return: 111 + * True if this was the last reference with no future references 112 + * possible. This signals the caller that it can safely release the 113 + * object which is protected by the reference counter. 114 + * 115 + * False if there are still active references or the put() raced 116 + * with a concurrent get()/put() pair. Caller is not allowed to 117 + * release the protected object. 118 + */ 119 + static inline __must_check bool rcuref_put_rcusafe(rcuref_t *ref) 120 + { 121 + return __rcuref_put(ref); 122 + } 123 + 124 + /** 125 + * rcuref_put -- Release one reference for a rcuref reference count 126 + * @ref: Pointer to the reference count 127 + * 128 + * Can be invoked from any context. 129 + * 130 + * Provides release memory ordering, such that prior loads and stores are done 131 + * before, and provides an acquire ordering on success such that free() 132 + * must come after. 133 + * 134 + * Return: 135 + * 136 + * True if this was the last reference with no future references 137 + * possible. This signals the caller that it can safely schedule the 138 + * object, which is protected by the reference counter, for 139 + * deconstruction. 140 + * 141 + * False if there are still active references or the put() raced 142 + * with a concurrent get()/put() pair. Caller is not allowed to 143 + * deconstruct the protected object. 144 + */ 145 + static inline __must_check bool rcuref_put(rcuref_t *ref) 146 + { 147 + bool released; 148 + 149 + preempt_disable(); 150 + released = __rcuref_put(ref); 151 + preempt_enable(); 152 + return released; 153 + } 154 + 155 + #endif
+6
include/linux/types.h
··· 175 175 } atomic64_t; 176 176 #endif 177 177 178 + typedef struct { 179 + atomic_t refcnt; 180 + } rcuref_t; 181 + 182 + #define RCUREF_INIT(i) { .refcnt = ATOMIC_INIT(i - 1) } 183 + 178 184 struct list_head { 179 185 struct list_head *next, *prev; 180 186 };
+1 -1
lib/Makefile
··· 47 47 list_sort.o uuid.o iov_iter.o clz_ctz.o \ 48 48 bsearch.o find_bit.o llist.o memweight.o kfifo.o \ 49 49 percpu-refcount.o rhashtable.o base64.o \ 50 - once.o refcount.o usercopy.o errseq.o bucket_locks.o \ 50 + once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \ 51 51 generic-radix-tree.o 52 52 obj-$(CONFIG_STRING_SELFTEST) += test_string.o 53 53 obj-y += string_helpers.o
+281
lib/rcuref.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + 3 + /* 4 + * rcuref - A scalable reference count implementation for RCU managed objects 5 + * 6 + * rcuref is provided to replace open coded reference count implementations 7 + * based on atomic_t. It protects explicitely RCU managed objects which can 8 + * be visible even after the last reference has been dropped and the object 9 + * is heading towards destruction. 10 + * 11 + * A common usage pattern is: 12 + * 13 + * get() 14 + * rcu_read_lock(); 15 + * p = get_ptr(); 16 + * if (p && !atomic_inc_not_zero(&p->refcnt)) 17 + * p = NULL; 18 + * rcu_read_unlock(); 19 + * return p; 20 + * 21 + * put() 22 + * if (!atomic_dec_return(&->refcnt)) { 23 + * remove_ptr(p); 24 + * kfree_rcu((p, rcu); 25 + * } 26 + * 27 + * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has 28 + * O(N^2) behaviour under contention with N concurrent operations. 29 + * 30 + * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales 31 + * better under contention. 32 + * 33 + * Why not refcount? 34 + * ================= 35 + * 36 + * In principle it should be possible to make refcount use the rcuref 37 + * scheme, but the destruction race described below cannot be prevented 38 + * unless the protected object is RCU managed. 39 + * 40 + * Theory of operation 41 + * =================== 42 + * 43 + * rcuref uses an unsigned integer reference counter. As long as the 44 + * counter value is greater than or equal to RCUREF_ONEREF and not larger 45 + * than RCUREF_MAXREF the reference is alive: 46 + * 47 + * ONEREF MAXREF SATURATED RELEASED DEAD NOREF 48 + * 0 0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF 49 + * <---valid --------> <-------saturation zone-------> <-----dead zone-----> 50 + * 51 + * The get() and put() operations do unconditional increments and 52 + * decrements. The result is checked after the operation. This optimizes 53 + * for the fast path. 54 + * 55 + * If the reference count is saturated or dead, then the increments and 56 + * decrements are not harmful as the reference count still stays in the 57 + * respective zones and is always set back to STATURATED resp. DEAD. The 58 + * zones have room for 2^28 racing operations in each direction, which 59 + * makes it practically impossible to escape the zones. 60 + * 61 + * Once the last reference is dropped the reference count becomes 62 + * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The 63 + * slowpath then tries to set the reference count from RCUREF_NOREF to 64 + * RCUREF_DEAD via a cmpxchg(). This opens a small window where a 65 + * concurrent rcuref_get() can acquire the reference count and bring it 66 + * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD. 67 + * 68 + * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in 69 + * DEAD + 1, which is inside the dead zone. If that happens the reference 70 + * count is put back to DEAD. 71 + * 72 + * The actual race is possible due to the unconditional increment and 73 + * decrements in rcuref_get() and rcuref_put(): 74 + * 75 + * T1 T2 76 + * get() put() 77 + * if (atomic_add_negative(-1, &ref->refcnt)) 78 + * succeeds-> atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); 79 + * 80 + * atomic_add_negative(1, &ref->refcnt); <- Elevates refcount to DEAD + 1 81 + * 82 + * As the result of T1's add is negative, the get() goes into the slow path 83 + * and observes refcnt being in the dead zone which makes the operation fail. 84 + * 85 + * Possible critical states: 86 + * 87 + * Context Counter References Operation 88 + * T1 0 1 init() 89 + * T2 1 2 get() 90 + * T1 0 1 put() 91 + * T2 -1 0 put() tries to mark dead 92 + * T1 0 1 get() 93 + * T2 0 1 put() mark dead fails 94 + * T1 -1 0 put() tries to mark dead 95 + * T1 DEAD 0 put() mark dead succeeds 96 + * T2 DEAD+1 0 get() fails and puts it back to DEAD 97 + * 98 + * Of course there are more complex scenarios, but the above illustrates 99 + * the working principle. The rest is left to the imagination of the 100 + * reader. 101 + * 102 + * Deconstruction race 103 + * =================== 104 + * 105 + * The release operation must be protected by prohibiting a grace period in 106 + * order to prevent a possible use after free: 107 + * 108 + * T1 T2 109 + * put() get() 110 + * // ref->refcnt = ONEREF 111 + * if (!atomic_add_negative(-1, &ref->refcnt)) 112 + * return false; <- Not taken 113 + * 114 + * // ref->refcnt == NOREF 115 + * --> preemption 116 + * // Elevates ref->refcnt to ONEREF 117 + * if (!atomic_add_negative(1, &ref->refcnt)) 118 + * return true; <- taken 119 + * 120 + * if (put(&p->ref)) { <-- Succeeds 121 + * remove_pointer(p); 122 + * kfree_rcu(p, rcu); 123 + * } 124 + * 125 + * RCU grace period ends, object is freed 126 + * 127 + * atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF 128 + * 129 + * This is prevented by disabling preemption around the put() operation as 130 + * that's in most kernel configurations cheaper than a rcu_read_lock() / 131 + * rcu_read_unlock() pair and in many cases even a NOOP. In any case it 132 + * prevents the grace period which keeps the object alive until all put() 133 + * operations complete. 134 + * 135 + * Saturation protection 136 + * ===================== 137 + * 138 + * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX). 139 + * Once this is exceedded the reference count becomes stale by setting it 140 + * to RCUREF_SATURATED, which will cause a memory leak, but it prevents 141 + * wrap arounds which obviously cause worse problems than a memory 142 + * leak. When saturation is reached a warning is emitted. 143 + * 144 + * Race conditions 145 + * =============== 146 + * 147 + * All reference count increment/decrement operations are unconditional and 148 + * only verified after the fact. This optimizes for the good case and takes 149 + * the occasional race vs. a dead or already saturated refcount into 150 + * account. The saturation and dead zones are large enough to accomodate 151 + * for that. 152 + * 153 + * Memory ordering 154 + * =============== 155 + * 156 + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions 157 + * and provide only what is strictly required for refcounts. 158 + * 159 + * The increments are fully relaxed; these will not provide ordering. The 160 + * rationale is that whatever is used to obtain the object to increase the 161 + * reference count on will provide the ordering. For locked data 162 + * structures, its the lock acquire, for RCU/lockless data structures its 163 + * the dependent load. 164 + * 165 + * rcuref_get() provides a control dependency ordering future stores which 166 + * ensures that the object is not modified when acquiring a reference 167 + * fails. 168 + * 169 + * rcuref_put() provides release order, i.e. all prior loads and stores 170 + * will be issued before. It also provides a control dependency ordering 171 + * against the subsequent destruction of the object. 172 + * 173 + * If rcuref_put() successfully dropped the last reference and marked the 174 + * object DEAD it also provides acquire ordering. 175 + */ 176 + 177 + #include <linux/export.h> 178 + #include <linux/rcuref.h> 179 + 180 + /** 181 + * rcuref_get_slowpath - Slowpath of rcuref_get() 182 + * @ref: Pointer to the reference count 183 + * 184 + * Invoked when the reference count is outside of the valid zone. 185 + * 186 + * Return: 187 + * False if the reference count was already marked dead 188 + * 189 + * True if the reference count is saturated, which prevents the 190 + * object from being deconstructed ever. 191 + */ 192 + bool rcuref_get_slowpath(rcuref_t *ref) 193 + { 194 + unsigned int cnt = atomic_read(&ref->refcnt); 195 + 196 + /* 197 + * If the reference count was already marked dead, undo the 198 + * increment so it stays in the middle of the dead zone and return 199 + * fail. 200 + */ 201 + if (cnt >= RCUREF_RELEASED) { 202 + atomic_set(&ref->refcnt, RCUREF_DEAD); 203 + return false; 204 + } 205 + 206 + /* 207 + * If it was saturated, warn and mark it so. In case the increment 208 + * was already on a saturated value restore the saturation 209 + * marker. This keeps it in the middle of the saturation zone and 210 + * prevents the reference count from overflowing. This leaks the 211 + * object memory, but prevents the obvious reference count overflow 212 + * damage. 213 + */ 214 + if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory")) 215 + atomic_set(&ref->refcnt, RCUREF_SATURATED); 216 + return true; 217 + } 218 + EXPORT_SYMBOL_GPL(rcuref_get_slowpath); 219 + 220 + /** 221 + * rcuref_put_slowpath - Slowpath of __rcuref_put() 222 + * @ref: Pointer to the reference count 223 + * 224 + * Invoked when the reference count is outside of the valid zone. 225 + * 226 + * Return: 227 + * True if this was the last reference with no future references 228 + * possible. This signals the caller that it can safely schedule the 229 + * object, which is protected by the reference counter, for 230 + * deconstruction. 231 + * 232 + * False if there are still active references or the put() raced 233 + * with a concurrent get()/put() pair. Caller is not allowed to 234 + * deconstruct the protected object. 235 + */ 236 + bool rcuref_put_slowpath(rcuref_t *ref) 237 + { 238 + unsigned int cnt = atomic_read(&ref->refcnt); 239 + 240 + /* Did this drop the last reference? */ 241 + if (likely(cnt == RCUREF_NOREF)) { 242 + /* 243 + * Carefully try to set the reference count to RCUREF_DEAD. 244 + * 245 + * This can fail if a concurrent get() operation has 246 + * elevated it again or the corresponding put() even marked 247 + * it dead already. Both are valid situations and do not 248 + * require a retry. If this fails the caller is not 249 + * allowed to deconstruct the object. 250 + */ 251 + if (atomic_cmpxchg_release(&ref->refcnt, RCUREF_NOREF, RCUREF_DEAD) != RCUREF_NOREF) 252 + return false; 253 + 254 + /* 255 + * The caller can safely schedule the object for 256 + * deconstruction. Provide acquire ordering. 257 + */ 258 + smp_acquire__after_ctrl_dep(); 259 + return true; 260 + } 261 + 262 + /* 263 + * If the reference count was already in the dead zone, then this 264 + * put() operation is imbalanced. Warn, put the reference count back to 265 + * DEAD and tell the caller to not deconstruct the object. 266 + */ 267 + if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) { 268 + atomic_set(&ref->refcnt, RCUREF_DEAD); 269 + return false; 270 + } 271 + 272 + /* 273 + * This is a put() operation on a saturated refcount. Restore the 274 + * mean saturation value and tell the caller to not deconstruct the 275 + * object. 276 + */ 277 + if (cnt > RCUREF_MAXREF) 278 + atomic_set(&ref->refcnt, RCUREF_SATURATED); 279 + return false; 280 + } 281 + EXPORT_SYMBOL_GPL(rcuref_put_slowpath);
+1 -1
scripts/atomic/atomics.tbl
··· 33 33 sub_and_test b i v 34 34 dec_and_test b v 35 35 inc_and_test b v 36 - add_negative b i v 36 + add_negative B i v 37 37 add_unless fb v i:a i:u 38 38 inc_not_zero b v 39 39 inc_unless_negative b v
+5 -6
scripts/atomic/fallbacks/add_negative
··· 1 1 cat <<EOF 2 2 /** 3 - * arch_${atomic}_add_negative - add and test if negative 3 + * arch_${atomic}_add_negative${order} - Add and test if negative 4 4 * @i: integer value to add 5 5 * @v: pointer of type ${atomic}_t 6 6 * 7 - * Atomically adds @i to @v and returns true 8 - * if the result is negative, or false when 9 - * result is greater than or equal to zero. 7 + * Atomically adds @i to @v and returns true if the result is negative, 8 + * or false when the result is greater than or equal to zero. 10 9 */ 11 10 static __always_inline bool 12 - arch_${atomic}_add_negative(${int} i, ${atomic}_t *v) 11 + arch_${atomic}_add_negative${order}(${int} i, ${atomic}_t *v) 13 12 { 14 - return arch_${atomic}_add_return(i, v) < 0; 13 + return arch_${atomic}_add_return${order}(i, v) < 0; 15 14 } 16 15 EOF