Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

Pull percpu updates from Tejun Heo:

- Major reorganization of percpu header files which I think makes
things a lot more readable and logical than before.

- percpu-refcount is updated so that it requires explicit destruction
and can be reinitialized if necessary. This was pulled into the
block tree to replace the custom percpu refcnting implemented in
blk-mq.

- In the process, percpu and percpu-refcount got cleaned up a bit

* 'for-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (21 commits)
percpu-refcount: implement percpu_ref_reinit() and percpu_ref_is_zero()
percpu-refcount: require percpu_ref to be exited explicitly
percpu-refcount: use unsigned long for pcpu_count pointer
percpu-refcount: add helpers for ->percpu_count accesses
percpu-refcount: one bit is enough for REF_STATUS
percpu-refcount, aio: use percpu_ref_cancel_init() in ioctx_alloc()
workqueue: stronger test in process_one_work()
workqueue: clear POOL_DISASSOCIATED in rebind_workers()
percpu: Use ALIGN macro instead of hand coding alignment calculation
percpu: invoke __verify_pcpu_ptr() from the generic part of accessors and operations
percpu: preffity percpu header files
percpu: use raw_cpu_*() to define __this_cpu_*()
percpu: reorder macros in percpu header files
percpu: move {raw|this}_cpu_*() definitions to include/linux/percpu-defs.h
percpu: move generic {raw|this}_cpu_*_N() definitions to include/asm-generic/percpu.h
percpu: only allow sized arch overrides for {raw|this}_cpu_*() ops
percpu: reorganize include/linux/percpu-defs.h
percpu: move accessors from include/linux/percpu.h to percpu-defs.h
percpu: include/asm-generic/percpu.h should contain only arch-overridable parts
percpu: introduce arch_raw_cpu_ptr()
...

+836 -815
+1 -2
arch/x86/include/asm/percpu.h
··· 52 52 * Compared to the generic __my_cpu_offset version, the following 53 53 * saves one instruction and avoids clobbering a temp register. 54 54 */ 55 - #define raw_cpu_ptr(ptr) \ 55 + #define arch_raw_cpu_ptr(ptr) \ 56 56 ({ \ 57 57 unsigned long tcp_ptr__; \ 58 - __verify_pcpu_ptr(ptr); \ 59 58 asm volatile("add " __percpu_arg(1) ", %0" \ 60 59 : "=r" (tcp_ptr__) \ 61 60 : "m" (this_cpu_off), "0" (ptr)); \
+3 -1
drivers/target/target_core_tpg.c
··· 825 825 826 826 ret = core_dev_export(dev, tpg, lun); 827 827 if (ret < 0) { 828 - percpu_ref_cancel_init(&lun->lun_ref); 828 + percpu_ref_exit(&lun->lun_ref); 829 829 return ret; 830 830 } 831 831 ··· 879 879 spin_lock(&tpg->tpg_lun_lock); 880 880 lun->lun_status = TRANSPORT_LUN_STATUS_FREE; 881 881 spin_unlock(&tpg->tpg_lun_lock); 882 + 883 + percpu_ref_exit(&lun->lun_ref); 882 884 883 885 return 0; 884 886 }
+4 -2
fs/aio.c
··· 506 506 507 507 aio_free_ring(ctx); 508 508 free_percpu(ctx->cpu); 509 + percpu_ref_exit(&ctx->reqs); 510 + percpu_ref_exit(&ctx->users); 509 511 kmem_cache_free(kioctx_cachep, ctx); 510 512 } 511 513 ··· 717 715 err: 718 716 mutex_unlock(&ctx->ring_lock); 719 717 free_percpu(ctx->cpu); 720 - free_percpu(ctx->reqs.pcpu_count); 721 - free_percpu(ctx->users.pcpu_count); 718 + percpu_ref_exit(&ctx->reqs); 719 + percpu_ref_exit(&ctx->users); 722 720 kmem_cache_free(kioctx_cachep, ctx); 723 721 pr_debug("error allocating ioctx %d\n", err); 724 722 return ERR_PTR(err);
+355 -63
include/asm-generic/percpu.h
··· 36 36 #endif 37 37 38 38 /* 39 - * Add a offset to a pointer but keep the pointer as is. 40 - * 41 - * Only S390 provides its own means of moving the pointer. 39 + * Arch may define arch_raw_cpu_ptr() to provide more efficient address 40 + * translations for raw_cpu_ptr(). 42 41 */ 43 - #ifndef SHIFT_PERCPU_PTR 44 - /* Weird cast keeps both GCC and sparse happy. */ 45 - #define SHIFT_PERCPU_PTR(__p, __offset) ({ \ 46 - __verify_pcpu_ptr((__p)); \ 47 - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ 48 - }) 42 + #ifndef arch_raw_cpu_ptr 43 + #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) 49 44 #endif 50 - 51 - /* 52 - * A percpu variable may point to a discarded regions. The following are 53 - * established ways to produce a usable pointer from the percpu variable 54 - * offset. 55 - */ 56 - #define per_cpu(var, cpu) \ 57 - (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) 58 - 59 - #ifndef raw_cpu_ptr 60 - #define raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) 61 - #endif 62 - #ifdef CONFIG_DEBUG_PREEMPT 63 - #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) 64 - #else 65 - #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) 66 - #endif 67 - 68 - #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) 69 - #define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) 70 45 71 46 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA 72 47 extern void setup_per_cpu_areas(void); 73 48 #endif 74 - 75 - #else /* ! SMP */ 76 - 77 - #define VERIFY_PERCPU_PTR(__p) ({ \ 78 - __verify_pcpu_ptr((__p)); \ 79 - (typeof(*(__p)) __kernel __force *)(__p); \ 80 - }) 81 - 82 - #define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var)))) 83 - #define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) 84 - #define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) 85 - #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) 86 - #define raw_cpu_ptr(ptr) this_cpu_ptr(ptr) 87 49 88 50 #endif /* SMP */ 89 51 ··· 57 95 #endif 58 96 #endif 59 97 60 - #ifdef CONFIG_SMP 61 - 62 - #ifdef MODULE 63 - #define PER_CPU_SHARED_ALIGNED_SECTION "" 64 - #define PER_CPU_ALIGNED_SECTION "" 65 - #else 66 - #define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned" 67 - #define PER_CPU_ALIGNED_SECTION "..shared_aligned" 68 - #endif 69 - #define PER_CPU_FIRST_SECTION "..first" 70 - 71 - #else 72 - 73 - #define PER_CPU_SHARED_ALIGNED_SECTION "" 74 - #define PER_CPU_ALIGNED_SECTION "..shared_aligned" 75 - #define PER_CPU_FIRST_SECTION "" 76 - 77 - #endif 78 - 79 98 #ifndef PER_CPU_ATTRIBUTES 80 99 #define PER_CPU_ATTRIBUTES 81 100 #endif ··· 65 122 #define PER_CPU_DEF_ATTRIBUTES 66 123 #endif 67 124 68 - /* Keep until we have removed all uses of __this_cpu_ptr */ 69 - #define __this_cpu_ptr raw_cpu_ptr 125 + #define raw_cpu_generic_to_op(pcp, val, op) \ 126 + do { \ 127 + *raw_cpu_ptr(&(pcp)) op val; \ 128 + } while (0) 129 + 130 + #define raw_cpu_generic_add_return(pcp, val) \ 131 + ({ \ 132 + raw_cpu_add(pcp, val); \ 133 + raw_cpu_read(pcp); \ 134 + }) 135 + 136 + #define raw_cpu_generic_xchg(pcp, nval) \ 137 + ({ \ 138 + typeof(pcp) __ret; \ 139 + __ret = raw_cpu_read(pcp); \ 140 + raw_cpu_write(pcp, nval); \ 141 + __ret; \ 142 + }) 143 + 144 + #define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ 145 + ({ \ 146 + typeof(pcp) __ret; \ 147 + __ret = raw_cpu_read(pcp); \ 148 + if (__ret == (oval)) \ 149 + raw_cpu_write(pcp, nval); \ 150 + __ret; \ 151 + }) 152 + 153 + #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 154 + ({ \ 155 + int __ret = 0; \ 156 + if (raw_cpu_read(pcp1) == (oval1) && \ 157 + raw_cpu_read(pcp2) == (oval2)) { \ 158 + raw_cpu_write(pcp1, nval1); \ 159 + raw_cpu_write(pcp2, nval2); \ 160 + __ret = 1; \ 161 + } \ 162 + (__ret); \ 163 + }) 164 + 165 + #define this_cpu_generic_read(pcp) \ 166 + ({ \ 167 + typeof(pcp) __ret; \ 168 + preempt_disable(); \ 169 + __ret = *this_cpu_ptr(&(pcp)); \ 170 + preempt_enable(); \ 171 + __ret; \ 172 + }) 173 + 174 + #define this_cpu_generic_to_op(pcp, val, op) \ 175 + do { \ 176 + unsigned long __flags; \ 177 + raw_local_irq_save(__flags); \ 178 + *raw_cpu_ptr(&(pcp)) op val; \ 179 + raw_local_irq_restore(__flags); \ 180 + } while (0) 181 + 182 + #define this_cpu_generic_add_return(pcp, val) \ 183 + ({ \ 184 + typeof(pcp) __ret; \ 185 + unsigned long __flags; \ 186 + raw_local_irq_save(__flags); \ 187 + raw_cpu_add(pcp, val); \ 188 + __ret = raw_cpu_read(pcp); \ 189 + raw_local_irq_restore(__flags); \ 190 + __ret; \ 191 + }) 192 + 193 + #define this_cpu_generic_xchg(pcp, nval) \ 194 + ({ \ 195 + typeof(pcp) __ret; \ 196 + unsigned long __flags; \ 197 + raw_local_irq_save(__flags); \ 198 + __ret = raw_cpu_read(pcp); \ 199 + raw_cpu_write(pcp, nval); \ 200 + raw_local_irq_restore(__flags); \ 201 + __ret; \ 202 + }) 203 + 204 + #define this_cpu_generic_cmpxchg(pcp, oval, nval) \ 205 + ({ \ 206 + typeof(pcp) __ret; \ 207 + unsigned long __flags; \ 208 + raw_local_irq_save(__flags); \ 209 + __ret = raw_cpu_read(pcp); \ 210 + if (__ret == (oval)) \ 211 + raw_cpu_write(pcp, nval); \ 212 + raw_local_irq_restore(__flags); \ 213 + __ret; \ 214 + }) 215 + 216 + #define this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 217 + ({ \ 218 + int __ret; \ 219 + unsigned long __flags; \ 220 + raw_local_irq_save(__flags); \ 221 + __ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ 222 + oval1, oval2, nval1, nval2); \ 223 + raw_local_irq_restore(__flags); \ 224 + __ret; \ 225 + }) 226 + 227 + #ifndef raw_cpu_read_1 228 + #define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) 229 + #endif 230 + #ifndef raw_cpu_read_2 231 + #define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) 232 + #endif 233 + #ifndef raw_cpu_read_4 234 + #define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) 235 + #endif 236 + #ifndef raw_cpu_read_8 237 + #define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) 238 + #endif 239 + 240 + #ifndef raw_cpu_write_1 241 + #define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op(pcp, val, =) 242 + #endif 243 + #ifndef raw_cpu_write_2 244 + #define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op(pcp, val, =) 245 + #endif 246 + #ifndef raw_cpu_write_4 247 + #define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op(pcp, val, =) 248 + #endif 249 + #ifndef raw_cpu_write_8 250 + #define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op(pcp, val, =) 251 + #endif 252 + 253 + #ifndef raw_cpu_add_1 254 + #define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op(pcp, val, +=) 255 + #endif 256 + #ifndef raw_cpu_add_2 257 + #define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op(pcp, val, +=) 258 + #endif 259 + #ifndef raw_cpu_add_4 260 + #define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op(pcp, val, +=) 261 + #endif 262 + #ifndef raw_cpu_add_8 263 + #define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op(pcp, val, +=) 264 + #endif 265 + 266 + #ifndef raw_cpu_and_1 267 + #define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op(pcp, val, &=) 268 + #endif 269 + #ifndef raw_cpu_and_2 270 + #define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op(pcp, val, &=) 271 + #endif 272 + #ifndef raw_cpu_and_4 273 + #define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op(pcp, val, &=) 274 + #endif 275 + #ifndef raw_cpu_and_8 276 + #define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op(pcp, val, &=) 277 + #endif 278 + 279 + #ifndef raw_cpu_or_1 280 + #define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op(pcp, val, |=) 281 + #endif 282 + #ifndef raw_cpu_or_2 283 + #define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op(pcp, val, |=) 284 + #endif 285 + #ifndef raw_cpu_or_4 286 + #define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op(pcp, val, |=) 287 + #endif 288 + #ifndef raw_cpu_or_8 289 + #define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op(pcp, val, |=) 290 + #endif 291 + 292 + #ifndef raw_cpu_add_return_1 293 + #define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) 294 + #endif 295 + #ifndef raw_cpu_add_return_2 296 + #define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val) 297 + #endif 298 + #ifndef raw_cpu_add_return_4 299 + #define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val) 300 + #endif 301 + #ifndef raw_cpu_add_return_8 302 + #define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val) 303 + #endif 304 + 305 + #ifndef raw_cpu_xchg_1 306 + #define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) 307 + #endif 308 + #ifndef raw_cpu_xchg_2 309 + #define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval) 310 + #endif 311 + #ifndef raw_cpu_xchg_4 312 + #define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval) 313 + #endif 314 + #ifndef raw_cpu_xchg_8 315 + #define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) 316 + #endif 317 + 318 + #ifndef raw_cpu_cmpxchg_1 319 + #define raw_cpu_cmpxchg_1(pcp, oval, nval) \ 320 + raw_cpu_generic_cmpxchg(pcp, oval, nval) 321 + #endif 322 + #ifndef raw_cpu_cmpxchg_2 323 + #define raw_cpu_cmpxchg_2(pcp, oval, nval) \ 324 + raw_cpu_generic_cmpxchg(pcp, oval, nval) 325 + #endif 326 + #ifndef raw_cpu_cmpxchg_4 327 + #define raw_cpu_cmpxchg_4(pcp, oval, nval) \ 328 + raw_cpu_generic_cmpxchg(pcp, oval, nval) 329 + #endif 330 + #ifndef raw_cpu_cmpxchg_8 331 + #define raw_cpu_cmpxchg_8(pcp, oval, nval) \ 332 + raw_cpu_generic_cmpxchg(pcp, oval, nval) 333 + #endif 334 + 335 + #ifndef raw_cpu_cmpxchg_double_1 336 + #define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 337 + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 338 + #endif 339 + #ifndef raw_cpu_cmpxchg_double_2 340 + #define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 341 + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 342 + #endif 343 + #ifndef raw_cpu_cmpxchg_double_4 344 + #define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 345 + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 346 + #endif 347 + #ifndef raw_cpu_cmpxchg_double_8 348 + #define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 349 + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 350 + #endif 351 + 352 + #ifndef this_cpu_read_1 353 + #define this_cpu_read_1(pcp) this_cpu_generic_read(pcp) 354 + #endif 355 + #ifndef this_cpu_read_2 356 + #define this_cpu_read_2(pcp) this_cpu_generic_read(pcp) 357 + #endif 358 + #ifndef this_cpu_read_4 359 + #define this_cpu_read_4(pcp) this_cpu_generic_read(pcp) 360 + #endif 361 + #ifndef this_cpu_read_8 362 + #define this_cpu_read_8(pcp) this_cpu_generic_read(pcp) 363 + #endif 364 + 365 + #ifndef this_cpu_write_1 366 + #define this_cpu_write_1(pcp, val) this_cpu_generic_to_op(pcp, val, =) 367 + #endif 368 + #ifndef this_cpu_write_2 369 + #define this_cpu_write_2(pcp, val) this_cpu_generic_to_op(pcp, val, =) 370 + #endif 371 + #ifndef this_cpu_write_4 372 + #define this_cpu_write_4(pcp, val) this_cpu_generic_to_op(pcp, val, =) 373 + #endif 374 + #ifndef this_cpu_write_8 375 + #define this_cpu_write_8(pcp, val) this_cpu_generic_to_op(pcp, val, =) 376 + #endif 377 + 378 + #ifndef this_cpu_add_1 379 + #define this_cpu_add_1(pcp, val) this_cpu_generic_to_op(pcp, val, +=) 380 + #endif 381 + #ifndef this_cpu_add_2 382 + #define this_cpu_add_2(pcp, val) this_cpu_generic_to_op(pcp, val, +=) 383 + #endif 384 + #ifndef this_cpu_add_4 385 + #define this_cpu_add_4(pcp, val) this_cpu_generic_to_op(pcp, val, +=) 386 + #endif 387 + #ifndef this_cpu_add_8 388 + #define this_cpu_add_8(pcp, val) this_cpu_generic_to_op(pcp, val, +=) 389 + #endif 390 + 391 + #ifndef this_cpu_and_1 392 + #define this_cpu_and_1(pcp, val) this_cpu_generic_to_op(pcp, val, &=) 393 + #endif 394 + #ifndef this_cpu_and_2 395 + #define this_cpu_and_2(pcp, val) this_cpu_generic_to_op(pcp, val, &=) 396 + #endif 397 + #ifndef this_cpu_and_4 398 + #define this_cpu_and_4(pcp, val) this_cpu_generic_to_op(pcp, val, &=) 399 + #endif 400 + #ifndef this_cpu_and_8 401 + #define this_cpu_and_8(pcp, val) this_cpu_generic_to_op(pcp, val, &=) 402 + #endif 403 + 404 + #ifndef this_cpu_or_1 405 + #define this_cpu_or_1(pcp, val) this_cpu_generic_to_op(pcp, val, |=) 406 + #endif 407 + #ifndef this_cpu_or_2 408 + #define this_cpu_or_2(pcp, val) this_cpu_generic_to_op(pcp, val, |=) 409 + #endif 410 + #ifndef this_cpu_or_4 411 + #define this_cpu_or_4(pcp, val) this_cpu_generic_to_op(pcp, val, |=) 412 + #endif 413 + #ifndef this_cpu_or_8 414 + #define this_cpu_or_8(pcp, val) this_cpu_generic_to_op(pcp, val, |=) 415 + #endif 416 + 417 + #ifndef this_cpu_add_return_1 418 + #define this_cpu_add_return_1(pcp, val) this_cpu_generic_add_return(pcp, val) 419 + #endif 420 + #ifndef this_cpu_add_return_2 421 + #define this_cpu_add_return_2(pcp, val) this_cpu_generic_add_return(pcp, val) 422 + #endif 423 + #ifndef this_cpu_add_return_4 424 + #define this_cpu_add_return_4(pcp, val) this_cpu_generic_add_return(pcp, val) 425 + #endif 426 + #ifndef this_cpu_add_return_8 427 + #define this_cpu_add_return_8(pcp, val) this_cpu_generic_add_return(pcp, val) 428 + #endif 429 + 430 + #ifndef this_cpu_xchg_1 431 + #define this_cpu_xchg_1(pcp, nval) this_cpu_generic_xchg(pcp, nval) 432 + #endif 433 + #ifndef this_cpu_xchg_2 434 + #define this_cpu_xchg_2(pcp, nval) this_cpu_generic_xchg(pcp, nval) 435 + #endif 436 + #ifndef this_cpu_xchg_4 437 + #define this_cpu_xchg_4(pcp, nval) this_cpu_generic_xchg(pcp, nval) 438 + #endif 439 + #ifndef this_cpu_xchg_8 440 + #define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval) 441 + #endif 442 + 443 + #ifndef this_cpu_cmpxchg_1 444 + #define this_cpu_cmpxchg_1(pcp, oval, nval) \ 445 + this_cpu_generic_cmpxchg(pcp, oval, nval) 446 + #endif 447 + #ifndef this_cpu_cmpxchg_2 448 + #define this_cpu_cmpxchg_2(pcp, oval, nval) \ 449 + this_cpu_generic_cmpxchg(pcp, oval, nval) 450 + #endif 451 + #ifndef this_cpu_cmpxchg_4 452 + #define this_cpu_cmpxchg_4(pcp, oval, nval) \ 453 + this_cpu_generic_cmpxchg(pcp, oval, nval) 454 + #endif 455 + #ifndef this_cpu_cmpxchg_8 456 + #define this_cpu_cmpxchg_8(pcp, oval, nval) \ 457 + this_cpu_generic_cmpxchg(pcp, oval, nval) 458 + #endif 459 + 460 + #ifndef this_cpu_cmpxchg_double_1 461 + #define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 462 + this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 463 + #endif 464 + #ifndef this_cpu_cmpxchg_double_2 465 + #define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 466 + this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 467 + #endif 468 + #ifndef this_cpu_cmpxchg_double_4 469 + #define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 470 + this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 471 + #endif 472 + #ifndef this_cpu_cmpxchg_double_8 473 + #define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 474 + this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 475 + #endif 70 476 71 477 #endif /* _ASM_GENERIC_PERCPU_H_ */
+367 -13
include/linux/percpu-defs.h
··· 1 + /* 2 + * linux/percpu-defs.h - basic definitions for percpu areas 3 + * 4 + * DO NOT INCLUDE DIRECTLY OUTSIDE PERCPU IMPLEMENTATION PROPER. 5 + * 6 + * This file is separate from linux/percpu.h to avoid cyclic inclusion 7 + * dependency from arch header files. Only to be included from 8 + * asm/percpu.h. 9 + * 10 + * This file includes macros necessary to declare percpu sections and 11 + * variables, and definitions of percpu accessors and operations. It 12 + * should provide enough percpu features to arch header files even when 13 + * they can only include asm/percpu.h to avoid cyclic inclusion dependency. 14 + */ 15 + 1 16 #ifndef _LINUX_PERCPU_DEFS_H 2 17 #define _LINUX_PERCPU_DEFS_H 18 + 19 + #ifdef CONFIG_SMP 20 + 21 + #ifdef MODULE 22 + #define PER_CPU_SHARED_ALIGNED_SECTION "" 23 + #define PER_CPU_ALIGNED_SECTION "" 24 + #else 25 + #define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned" 26 + #define PER_CPU_ALIGNED_SECTION "..shared_aligned" 27 + #endif 28 + #define PER_CPU_FIRST_SECTION "..first" 29 + 30 + #else 31 + 32 + #define PER_CPU_SHARED_ALIGNED_SECTION "" 33 + #define PER_CPU_ALIGNED_SECTION "..shared_aligned" 34 + #define PER_CPU_FIRST_SECTION "" 35 + 36 + #endif 3 37 4 38 /* 5 39 * Base implementations of per-CPU variable declarations and definitions, where ··· 51 17 52 18 #define __PCPU_DUMMY_ATTRS \ 53 19 __attribute__((section(".discard"), unused)) 54 - 55 - /* 56 - * Macro which verifies @ptr is a percpu pointer without evaluating 57 - * @ptr. This is to be used in percpu accessors to verify that the 58 - * input parameter is a percpu pointer. 59 - * 60 - * + 0 is required in order to convert the pointer type from a 61 - * potential array type to a pointer to a single item of the array. 62 - */ 63 - #define __verify_pcpu_ptr(ptr) do { \ 64 - const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \ 65 - (void)__vpp_verify; \ 66 - } while (0) 67 20 68 21 /* 69 22 * s390 and alpha modules require percpu variables to be defined as ··· 185 164 #define EXPORT_PER_CPU_SYMBOL_GPL(var) 186 165 #endif 187 166 167 + /* 168 + * Accessors and operations. 169 + */ 170 + #ifndef __ASSEMBLY__ 171 + 172 + /* 173 + * __verify_pcpu_ptr() verifies @ptr is a percpu pointer without evaluating 174 + * @ptr and is invoked once before a percpu area is accessed by all 175 + * accessors and operations. This is performed in the generic part of 176 + * percpu and arch overrides don't need to worry about it; however, if an 177 + * arch wants to implement an arch-specific percpu accessor or operation, 178 + * it may use __verify_pcpu_ptr() to verify the parameters. 179 + * 180 + * + 0 is required in order to convert the pointer type from a 181 + * potential array type to a pointer to a single item of the array. 182 + */ 183 + #define __verify_pcpu_ptr(ptr) \ 184 + do { \ 185 + const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \ 186 + (void)__vpp_verify; \ 187 + } while (0) 188 + 189 + #ifdef CONFIG_SMP 190 + 191 + /* 192 + * Add an offset to a pointer but keep the pointer as-is. Use RELOC_HIDE() 193 + * to prevent the compiler from making incorrect assumptions about the 194 + * pointer value. The weird cast keeps both GCC and sparse happy. 195 + */ 196 + #define SHIFT_PERCPU_PTR(__p, __offset) \ 197 + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) 198 + 199 + #define per_cpu_ptr(ptr, cpu) \ 200 + ({ \ 201 + __verify_pcpu_ptr(ptr); \ 202 + SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))); \ 203 + }) 204 + 205 + #define raw_cpu_ptr(ptr) \ 206 + ({ \ 207 + __verify_pcpu_ptr(ptr); \ 208 + arch_raw_cpu_ptr(ptr); \ 209 + }) 210 + 211 + #ifdef CONFIG_DEBUG_PREEMPT 212 + #define this_cpu_ptr(ptr) \ 213 + ({ \ 214 + __verify_pcpu_ptr(ptr); \ 215 + SHIFT_PERCPU_PTR(ptr, my_cpu_offset); \ 216 + }) 217 + #else 218 + #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) 219 + #endif 220 + 221 + #else /* CONFIG_SMP */ 222 + 223 + #define VERIFY_PERCPU_PTR(__p) \ 224 + ({ \ 225 + __verify_pcpu_ptr(__p); \ 226 + (typeof(*(__p)) __kernel __force *)(__p); \ 227 + }) 228 + 229 + #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR(ptr); }) 230 + #define raw_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) 231 + #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) 232 + 233 + #endif /* CONFIG_SMP */ 234 + 235 + #define per_cpu(var, cpu) (*per_cpu_ptr(&(var), cpu)) 236 + #define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) 237 + #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) 238 + 239 + /* keep until we have removed all uses of __this_cpu_ptr */ 240 + #define __this_cpu_ptr(ptr) raw_cpu_ptr(ptr) 241 + 242 + /* 243 + * Must be an lvalue. Since @var must be a simple identifier, 244 + * we force a syntax error here if it isn't. 245 + */ 246 + #define get_cpu_var(var) \ 247 + (*({ \ 248 + preempt_disable(); \ 249 + this_cpu_ptr(&var); \ 250 + })) 251 + 252 + /* 253 + * The weird & is necessary because sparse considers (void)(var) to be 254 + * a direct dereference of percpu variable (var). 255 + */ 256 + #define put_cpu_var(var) \ 257 + do { \ 258 + (void)&(var); \ 259 + preempt_enable(); \ 260 + } while (0) 261 + 262 + #define get_cpu_ptr(var) \ 263 + ({ \ 264 + preempt_disable(); \ 265 + this_cpu_ptr(var); \ 266 + }) 267 + 268 + #define put_cpu_ptr(var) \ 269 + do { \ 270 + (void)(var); \ 271 + preempt_enable(); \ 272 + } while (0) 273 + 274 + /* 275 + * Branching function to split up a function into a set of functions that 276 + * are called for different scalar sizes of the objects handled. 277 + */ 278 + 279 + extern void __bad_size_call_parameter(void); 280 + 281 + #ifdef CONFIG_DEBUG_PREEMPT 282 + extern void __this_cpu_preempt_check(const char *op); 283 + #else 284 + static inline void __this_cpu_preempt_check(const char *op) { } 285 + #endif 286 + 287 + #define __pcpu_size_call_return(stem, variable) \ 288 + ({ \ 289 + typeof(variable) pscr_ret__; \ 290 + __verify_pcpu_ptr(&(variable)); \ 291 + switch(sizeof(variable)) { \ 292 + case 1: pscr_ret__ = stem##1(variable); break; \ 293 + case 2: pscr_ret__ = stem##2(variable); break; \ 294 + case 4: pscr_ret__ = stem##4(variable); break; \ 295 + case 8: pscr_ret__ = stem##8(variable); break; \ 296 + default: \ 297 + __bad_size_call_parameter(); break; \ 298 + } \ 299 + pscr_ret__; \ 300 + }) 301 + 302 + #define __pcpu_size_call_return2(stem, variable, ...) \ 303 + ({ \ 304 + typeof(variable) pscr2_ret__; \ 305 + __verify_pcpu_ptr(&(variable)); \ 306 + switch(sizeof(variable)) { \ 307 + case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ 308 + case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \ 309 + case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \ 310 + case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \ 311 + default: \ 312 + __bad_size_call_parameter(); break; \ 313 + } \ 314 + pscr2_ret__; \ 315 + }) 316 + 317 + /* 318 + * Special handling for cmpxchg_double. cmpxchg_double is passed two 319 + * percpu variables. The first has to be aligned to a double word 320 + * boundary and the second has to follow directly thereafter. 321 + * We enforce this on all architectures even if they don't support 322 + * a double cmpxchg instruction, since it's a cheap requirement, and it 323 + * avoids breaking the requirement for architectures with the instruction. 324 + */ 325 + #define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ 326 + ({ \ 327 + bool pdcrb_ret__; \ 328 + __verify_pcpu_ptr(&(pcp1)); \ 329 + BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \ 330 + VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1))); \ 331 + VM_BUG_ON((unsigned long)(&(pcp2)) != \ 332 + (unsigned long)(&(pcp1)) + sizeof(pcp1)); \ 333 + switch(sizeof(pcp1)) { \ 334 + case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \ 335 + case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \ 336 + case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \ 337 + case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \ 338 + default: \ 339 + __bad_size_call_parameter(); break; \ 340 + } \ 341 + pdcrb_ret__; \ 342 + }) 343 + 344 + #define __pcpu_size_call(stem, variable, ...) \ 345 + do { \ 346 + __verify_pcpu_ptr(&(variable)); \ 347 + switch(sizeof(variable)) { \ 348 + case 1: stem##1(variable, __VA_ARGS__);break; \ 349 + case 2: stem##2(variable, __VA_ARGS__);break; \ 350 + case 4: stem##4(variable, __VA_ARGS__);break; \ 351 + case 8: stem##8(variable, __VA_ARGS__);break; \ 352 + default: \ 353 + __bad_size_call_parameter();break; \ 354 + } \ 355 + } while (0) 356 + 357 + /* 358 + * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@linux.com> 359 + * 360 + * Optimized manipulation for memory allocated through the per cpu 361 + * allocator or for addresses of per cpu variables. 362 + * 363 + * These operation guarantee exclusivity of access for other operations 364 + * on the *same* processor. The assumption is that per cpu data is only 365 + * accessed by a single processor instance (the current one). 366 + * 367 + * The arch code can provide optimized implementation by defining macros 368 + * for certain scalar sizes. F.e. provide this_cpu_add_2() to provide per 369 + * cpu atomic operations for 2 byte sized RMW actions. If arch code does 370 + * not provide operations for a scalar size then the fallback in the 371 + * generic code will be used. 372 + * 373 + * cmpxchg_double replaces two adjacent scalars at once. The first two 374 + * parameters are per cpu variables which have to be of the same size. A 375 + * truth value is returned to indicate success or failure (since a double 376 + * register result is difficult to handle). There is very limited hardware 377 + * support for these operations, so only certain sizes may work. 378 + */ 379 + 380 + /* 381 + * Operations for contexts where we do not want to do any checks for 382 + * preemptions. Unless strictly necessary, always use [__]this_cpu_*() 383 + * instead. 384 + * 385 + * If there is no other protection through preempt disable and/or disabling 386 + * interupts then one of these RMW operations can show unexpected behavior 387 + * because the execution thread was rescheduled on another processor or an 388 + * interrupt occurred and the same percpu variable was modified from the 389 + * interrupt context. 390 + */ 391 + #define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, pcp) 392 + #define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, pcp, val) 393 + #define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, pcp, val) 394 + #define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, pcp, val) 395 + #define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, pcp, val) 396 + #define raw_cpu_add_return(pcp, val) __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) 397 + #define raw_cpu_xchg(pcp, nval) __pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval) 398 + #define raw_cpu_cmpxchg(pcp, oval, nval) \ 399 + __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) 400 + #define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 401 + __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2) 402 + 403 + #define raw_cpu_sub(pcp, val) raw_cpu_add(pcp, -(val)) 404 + #define raw_cpu_inc(pcp) raw_cpu_add(pcp, 1) 405 + #define raw_cpu_dec(pcp) raw_cpu_sub(pcp, 1) 406 + #define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) 407 + #define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) 408 + #define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) 409 + 410 + /* 411 + * Operations for contexts that are safe from preemption/interrupts. These 412 + * operations verify that preemption is disabled. 413 + */ 414 + #define __this_cpu_read(pcp) \ 415 + ({ \ 416 + __this_cpu_preempt_check("read"); \ 417 + raw_cpu_read(pcp); \ 418 + }) 419 + 420 + #define __this_cpu_write(pcp, val) \ 421 + ({ \ 422 + __this_cpu_preempt_check("write"); \ 423 + raw_cpu_write(pcp, val); \ 424 + }) 425 + 426 + #define __this_cpu_add(pcp, val) \ 427 + ({ \ 428 + __this_cpu_preempt_check("add"); \ 429 + raw_cpu_add(pcp, val); \ 430 + }) 431 + 432 + #define __this_cpu_and(pcp, val) \ 433 + ({ \ 434 + __this_cpu_preempt_check("and"); \ 435 + raw_cpu_and(pcp, val); \ 436 + }) 437 + 438 + #define __this_cpu_or(pcp, val) \ 439 + ({ \ 440 + __this_cpu_preempt_check("or"); \ 441 + raw_cpu_or(pcp, val); \ 442 + }) 443 + 444 + #define __this_cpu_add_return(pcp, val) \ 445 + ({ \ 446 + __this_cpu_preempt_check("add_return"); \ 447 + raw_cpu_add_return(pcp, val); \ 448 + }) 449 + 450 + #define __this_cpu_xchg(pcp, nval) \ 451 + ({ \ 452 + __this_cpu_preempt_check("xchg"); \ 453 + raw_cpu_xchg(pcp, nval); \ 454 + }) 455 + 456 + #define __this_cpu_cmpxchg(pcp, oval, nval) \ 457 + ({ \ 458 + __this_cpu_preempt_check("cmpxchg"); \ 459 + raw_cpu_cmpxchg(pcp, oval, nval); \ 460 + }) 461 + 462 + #define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 463 + ({ __this_cpu_preempt_check("cmpxchg_double"); \ 464 + raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2); \ 465 + }) 466 + 467 + #define __this_cpu_sub(pcp, val) __this_cpu_add(pcp, -(typeof(pcp))(val)) 468 + #define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1) 469 + #define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1) 470 + #define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) 471 + #define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) 472 + #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) 473 + 474 + /* 475 + * Operations with implied preemption protection. These operations can be 476 + * used without worrying about preemption. Note that interrupts may still 477 + * occur while an operation is in progress and if the interrupt modifies 478 + * the variable too then RMW actions may not be reliable. 479 + */ 480 + #define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, pcp) 481 + #define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, pcp, val) 482 + #define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, pcp, val) 483 + #define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, pcp, val) 484 + #define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, pcp, val) 485 + #define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) 486 + #define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval) 487 + #define this_cpu_cmpxchg(pcp, oval, nval) \ 488 + __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) 489 + #define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 490 + __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2) 491 + 492 + #define this_cpu_sub(pcp, val) this_cpu_add(pcp, -(typeof(pcp))(val)) 493 + #define this_cpu_inc(pcp) this_cpu_add(pcp, 1) 494 + #define this_cpu_dec(pcp) this_cpu_sub(pcp, 1) 495 + #define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) 496 + #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) 497 + #define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) 498 + 499 + #endif /* __ASSEMBLY__ */ 188 500 #endif /* _LINUX_PERCPU_DEFS_H */
+43 -21
include/linux/percpu-refcount.h
··· 57 57 atomic_t count; 58 58 /* 59 59 * The low bit of the pointer indicates whether the ref is in percpu 60 - * mode; if set, then get/put will manipulate the atomic_t (this is a 61 - * hack because we need to keep the pointer around for 62 - * percpu_ref_kill_rcu()) 60 + * mode; if set, then get/put will manipulate the atomic_t. 63 61 */ 64 - unsigned __percpu *pcpu_count; 62 + unsigned long pcpu_count_ptr; 65 63 percpu_ref_func_t *release; 66 64 percpu_ref_func_t *confirm_kill; 67 65 struct rcu_head rcu; ··· 67 69 68 70 int __must_check percpu_ref_init(struct percpu_ref *ref, 69 71 percpu_ref_func_t *release); 70 - void percpu_ref_cancel_init(struct percpu_ref *ref); 72 + void percpu_ref_reinit(struct percpu_ref *ref); 73 + void percpu_ref_exit(struct percpu_ref *ref); 71 74 void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 72 75 percpu_ref_func_t *confirm_kill); 73 76 ··· 87 88 return percpu_ref_kill_and_confirm(ref, NULL); 88 89 } 89 90 90 - #define PCPU_STATUS_BITS 2 91 - #define PCPU_STATUS_MASK ((1 << PCPU_STATUS_BITS) - 1) 92 - #define PCPU_REF_PTR 0 93 91 #define PCPU_REF_DEAD 1 94 92 95 - #define REF_STATUS(count) (((unsigned long) count) & PCPU_STATUS_MASK) 93 + /* 94 + * Internal helper. Don't use outside percpu-refcount proper. The 95 + * function doesn't return the pointer and let the caller test it for NULL 96 + * because doing so forces the compiler to generate two conditional 97 + * branches as it can't assume that @ref->pcpu_count is not NULL. 98 + */ 99 + static inline bool __pcpu_ref_alive(struct percpu_ref *ref, 100 + unsigned __percpu **pcpu_countp) 101 + { 102 + unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr); 103 + 104 + /* paired with smp_store_release() in percpu_ref_reinit() */ 105 + smp_read_barrier_depends(); 106 + 107 + if (unlikely(pcpu_ptr & PCPU_REF_DEAD)) 108 + return false; 109 + 110 + *pcpu_countp = (unsigned __percpu *)pcpu_ptr; 111 + return true; 112 + } 96 113 97 114 /** 98 115 * percpu_ref_get - increment a percpu refcount ··· 122 107 123 108 rcu_read_lock_sched(); 124 109 125 - pcpu_count = ACCESS_ONCE(ref->pcpu_count); 126 - 127 - if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) 110 + if (__pcpu_ref_alive(ref, &pcpu_count)) 128 111 this_cpu_inc(*pcpu_count); 129 112 else 130 113 atomic_inc(&ref->count); ··· 146 133 147 134 rcu_read_lock_sched(); 148 135 149 - pcpu_count = ACCESS_ONCE(ref->pcpu_count); 150 - 151 - if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) { 136 + if (__pcpu_ref_alive(ref, &pcpu_count)) { 152 137 this_cpu_inc(*pcpu_count); 153 138 ret = true; 154 139 } else { ··· 179 168 180 169 rcu_read_lock_sched(); 181 170 182 - pcpu_count = ACCESS_ONCE(ref->pcpu_count); 183 - 184 - if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) { 171 + if (__pcpu_ref_alive(ref, &pcpu_count)) { 185 172 this_cpu_inc(*pcpu_count); 186 173 ret = true; 187 174 } ··· 202 193 203 194 rcu_read_lock_sched(); 204 195 205 - pcpu_count = ACCESS_ONCE(ref->pcpu_count); 206 - 207 - if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) 196 + if (__pcpu_ref_alive(ref, &pcpu_count)) 208 197 this_cpu_dec(*pcpu_count); 209 198 else if (unlikely(atomic_dec_and_test(&ref->count))) 210 199 ref->release(ref); 211 200 212 201 rcu_read_unlock_sched(); 202 + } 203 + 204 + /** 205 + * percpu_ref_is_zero - test whether a percpu refcount reached zero 206 + * @ref: percpu_ref to test 207 + * 208 + * Returns %true if @ref reached zero. 209 + */ 210 + static inline bool percpu_ref_is_zero(struct percpu_ref *ref) 211 + { 212 + unsigned __percpu *pcpu_count; 213 + 214 + if (__pcpu_ref_alive(ref, &pcpu_count)) 215 + return false; 216 + return !atomic_read(&ref->count); 213 217 } 214 218 215 219 #endif
-673
include/linux/percpu.h
··· 23 23 PERCPU_MODULE_RESERVE) 24 24 #endif 25 25 26 - /* 27 - * Must be an lvalue. Since @var must be a simple identifier, 28 - * we force a syntax error here if it isn't. 29 - */ 30 - #define get_cpu_var(var) (*({ \ 31 - preempt_disable(); \ 32 - this_cpu_ptr(&var); })) 33 - 34 - /* 35 - * The weird & is necessary because sparse considers (void)(var) to be 36 - * a direct dereference of percpu variable (var). 37 - */ 38 - #define put_cpu_var(var) do { \ 39 - (void)&(var); \ 40 - preempt_enable(); \ 41 - } while (0) 42 - 43 - #define get_cpu_ptr(var) ({ \ 44 - preempt_disable(); \ 45 - this_cpu_ptr(var); }) 46 - 47 - #define put_cpu_ptr(var) do { \ 48 - (void)(var); \ 49 - preempt_enable(); \ 50 - } while (0) 51 - 52 26 /* minimum unit size, also is the maximum supported allocation size */ 53 27 #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) 54 28 ··· 114 140 pcpu_fc_populate_pte_fn_t populate_pte_fn); 115 141 #endif 116 142 117 - /* 118 - * Use this to get to a cpu's version of the per-cpu object 119 - * dynamically allocated. Non-atomic access to the current CPU's 120 - * version should probably be combined with get_cpu()/put_cpu(). 121 - */ 122 - #ifdef CONFIG_SMP 123 - #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) 124 - #else 125 - #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) 126 - #endif 127 - 128 143 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); 129 144 extern bool is_kernel_percpu_address(unsigned long addr); 130 145 ··· 128 165 129 166 #define alloc_percpu(type) \ 130 167 (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type)) 131 - 132 - /* 133 - * Branching function to split up a function into a set of functions that 134 - * are called for different scalar sizes of the objects handled. 135 - */ 136 - 137 - extern void __bad_size_call_parameter(void); 138 - 139 - #ifdef CONFIG_DEBUG_PREEMPT 140 - extern void __this_cpu_preempt_check(const char *op); 141 - #else 142 - static inline void __this_cpu_preempt_check(const char *op) { } 143 - #endif 144 - 145 - #define __pcpu_size_call_return(stem, variable) \ 146 - ({ typeof(variable) pscr_ret__; \ 147 - __verify_pcpu_ptr(&(variable)); \ 148 - switch(sizeof(variable)) { \ 149 - case 1: pscr_ret__ = stem##1(variable);break; \ 150 - case 2: pscr_ret__ = stem##2(variable);break; \ 151 - case 4: pscr_ret__ = stem##4(variable);break; \ 152 - case 8: pscr_ret__ = stem##8(variable);break; \ 153 - default: \ 154 - __bad_size_call_parameter();break; \ 155 - } \ 156 - pscr_ret__; \ 157 - }) 158 - 159 - #define __pcpu_size_call_return2(stem, variable, ...) \ 160 - ({ \ 161 - typeof(variable) pscr2_ret__; \ 162 - __verify_pcpu_ptr(&(variable)); \ 163 - switch(sizeof(variable)) { \ 164 - case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ 165 - case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \ 166 - case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \ 167 - case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \ 168 - default: \ 169 - __bad_size_call_parameter(); break; \ 170 - } \ 171 - pscr2_ret__; \ 172 - }) 173 - 174 - /* 175 - * Special handling for cmpxchg_double. cmpxchg_double is passed two 176 - * percpu variables. The first has to be aligned to a double word 177 - * boundary and the second has to follow directly thereafter. 178 - * We enforce this on all architectures even if they don't support 179 - * a double cmpxchg instruction, since it's a cheap requirement, and it 180 - * avoids breaking the requirement for architectures with the instruction. 181 - */ 182 - #define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ 183 - ({ \ 184 - bool pdcrb_ret__; \ 185 - __verify_pcpu_ptr(&pcp1); \ 186 - BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \ 187 - VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1))); \ 188 - VM_BUG_ON((unsigned long)(&pcp2) != \ 189 - (unsigned long)(&pcp1) + sizeof(pcp1)); \ 190 - switch(sizeof(pcp1)) { \ 191 - case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \ 192 - case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \ 193 - case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \ 194 - case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \ 195 - default: \ 196 - __bad_size_call_parameter(); break; \ 197 - } \ 198 - pdcrb_ret__; \ 199 - }) 200 - 201 - #define __pcpu_size_call(stem, variable, ...) \ 202 - do { \ 203 - __verify_pcpu_ptr(&(variable)); \ 204 - switch(sizeof(variable)) { \ 205 - case 1: stem##1(variable, __VA_ARGS__);break; \ 206 - case 2: stem##2(variable, __VA_ARGS__);break; \ 207 - case 4: stem##4(variable, __VA_ARGS__);break; \ 208 - case 8: stem##8(variable, __VA_ARGS__);break; \ 209 - default: \ 210 - __bad_size_call_parameter();break; \ 211 - } \ 212 - } while (0) 213 - 214 - /* 215 - * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@linux.com> 216 - * 217 - * Optimized manipulation for memory allocated through the per cpu 218 - * allocator or for addresses of per cpu variables. 219 - * 220 - * These operation guarantee exclusivity of access for other operations 221 - * on the *same* processor. The assumption is that per cpu data is only 222 - * accessed by a single processor instance (the current one). 223 - * 224 - * The first group is used for accesses that must be done in a 225 - * preemption safe way since we know that the context is not preempt 226 - * safe. Interrupts may occur. If the interrupt modifies the variable 227 - * too then RMW actions will not be reliable. 228 - * 229 - * The arch code can provide optimized functions in two ways: 230 - * 231 - * 1. Override the function completely. F.e. define this_cpu_add(). 232 - * The arch must then ensure that the various scalar format passed 233 - * are handled correctly. 234 - * 235 - * 2. Provide functions for certain scalar sizes. F.e. provide 236 - * this_cpu_add_2() to provide per cpu atomic operations for 2 byte 237 - * sized RMW actions. If arch code does not provide operations for 238 - * a scalar size then the fallback in the generic code will be 239 - * used. 240 - */ 241 - 242 - #define _this_cpu_generic_read(pcp) \ 243 - ({ typeof(pcp) ret__; \ 244 - preempt_disable(); \ 245 - ret__ = *this_cpu_ptr(&(pcp)); \ 246 - preempt_enable(); \ 247 - ret__; \ 248 - }) 249 - 250 - #ifndef this_cpu_read 251 - # ifndef this_cpu_read_1 252 - # define this_cpu_read_1(pcp) _this_cpu_generic_read(pcp) 253 - # endif 254 - # ifndef this_cpu_read_2 255 - # define this_cpu_read_2(pcp) _this_cpu_generic_read(pcp) 256 - # endif 257 - # ifndef this_cpu_read_4 258 - # define this_cpu_read_4(pcp) _this_cpu_generic_read(pcp) 259 - # endif 260 - # ifndef this_cpu_read_8 261 - # define this_cpu_read_8(pcp) _this_cpu_generic_read(pcp) 262 - # endif 263 - # define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, (pcp)) 264 - #endif 265 - 266 - #define _this_cpu_generic_to_op(pcp, val, op) \ 267 - do { \ 268 - unsigned long flags; \ 269 - raw_local_irq_save(flags); \ 270 - *raw_cpu_ptr(&(pcp)) op val; \ 271 - raw_local_irq_restore(flags); \ 272 - } while (0) 273 - 274 - #ifndef this_cpu_write 275 - # ifndef this_cpu_write_1 276 - # define this_cpu_write_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) 277 - # endif 278 - # ifndef this_cpu_write_2 279 - # define this_cpu_write_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) 280 - # endif 281 - # ifndef this_cpu_write_4 282 - # define this_cpu_write_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) 283 - # endif 284 - # ifndef this_cpu_write_8 285 - # define this_cpu_write_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) 286 - # endif 287 - # define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, (pcp), (val)) 288 - #endif 289 - 290 - #ifndef this_cpu_add 291 - # ifndef this_cpu_add_1 292 - # define this_cpu_add_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) 293 - # endif 294 - # ifndef this_cpu_add_2 295 - # define this_cpu_add_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) 296 - # endif 297 - # ifndef this_cpu_add_4 298 - # define this_cpu_add_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) 299 - # endif 300 - # ifndef this_cpu_add_8 301 - # define this_cpu_add_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) 302 - # endif 303 - # define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, (pcp), (val)) 304 - #endif 305 - 306 - #ifndef this_cpu_sub 307 - # define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) 308 - #endif 309 - 310 - #ifndef this_cpu_inc 311 - # define this_cpu_inc(pcp) this_cpu_add((pcp), 1) 312 - #endif 313 - 314 - #ifndef this_cpu_dec 315 - # define this_cpu_dec(pcp) this_cpu_sub((pcp), 1) 316 - #endif 317 - 318 - #ifndef this_cpu_and 319 - # ifndef this_cpu_and_1 320 - # define this_cpu_and_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) 321 - # endif 322 - # ifndef this_cpu_and_2 323 - # define this_cpu_and_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) 324 - # endif 325 - # ifndef this_cpu_and_4 326 - # define this_cpu_and_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) 327 - # endif 328 - # ifndef this_cpu_and_8 329 - # define this_cpu_and_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) 330 - # endif 331 - # define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, (pcp), (val)) 332 - #endif 333 - 334 - #ifndef this_cpu_or 335 - # ifndef this_cpu_or_1 336 - # define this_cpu_or_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) 337 - # endif 338 - # ifndef this_cpu_or_2 339 - # define this_cpu_or_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) 340 - # endif 341 - # ifndef this_cpu_or_4 342 - # define this_cpu_or_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) 343 - # endif 344 - # ifndef this_cpu_or_8 345 - # define this_cpu_or_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) 346 - # endif 347 - # define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) 348 - #endif 349 - 350 - #define _this_cpu_generic_add_return(pcp, val) \ 351 - ({ \ 352 - typeof(pcp) ret__; \ 353 - unsigned long flags; \ 354 - raw_local_irq_save(flags); \ 355 - raw_cpu_add(pcp, val); \ 356 - ret__ = raw_cpu_read(pcp); \ 357 - raw_local_irq_restore(flags); \ 358 - ret__; \ 359 - }) 360 - 361 - #ifndef this_cpu_add_return 362 - # ifndef this_cpu_add_return_1 363 - # define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val) 364 - # endif 365 - # ifndef this_cpu_add_return_2 366 - # define this_cpu_add_return_2(pcp, val) _this_cpu_generic_add_return(pcp, val) 367 - # endif 368 - # ifndef this_cpu_add_return_4 369 - # define this_cpu_add_return_4(pcp, val) _this_cpu_generic_add_return(pcp, val) 370 - # endif 371 - # ifndef this_cpu_add_return_8 372 - # define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val) 373 - # endif 374 - # define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) 375 - #endif 376 - 377 - #define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) 378 - #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) 379 - #define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) 380 - 381 - #define _this_cpu_generic_xchg(pcp, nval) \ 382 - ({ typeof(pcp) ret__; \ 383 - unsigned long flags; \ 384 - raw_local_irq_save(flags); \ 385 - ret__ = raw_cpu_read(pcp); \ 386 - raw_cpu_write(pcp, nval); \ 387 - raw_local_irq_restore(flags); \ 388 - ret__; \ 389 - }) 390 - 391 - #ifndef this_cpu_xchg 392 - # ifndef this_cpu_xchg_1 393 - # define this_cpu_xchg_1(pcp, nval) _this_cpu_generic_xchg(pcp, nval) 394 - # endif 395 - # ifndef this_cpu_xchg_2 396 - # define this_cpu_xchg_2(pcp, nval) _this_cpu_generic_xchg(pcp, nval) 397 - # endif 398 - # ifndef this_cpu_xchg_4 399 - # define this_cpu_xchg_4(pcp, nval) _this_cpu_generic_xchg(pcp, nval) 400 - # endif 401 - # ifndef this_cpu_xchg_8 402 - # define this_cpu_xchg_8(pcp, nval) _this_cpu_generic_xchg(pcp, nval) 403 - # endif 404 - # define this_cpu_xchg(pcp, nval) \ 405 - __pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval) 406 - #endif 407 - 408 - #define _this_cpu_generic_cmpxchg(pcp, oval, nval) \ 409 - ({ \ 410 - typeof(pcp) ret__; \ 411 - unsigned long flags; \ 412 - raw_local_irq_save(flags); \ 413 - ret__ = raw_cpu_read(pcp); \ 414 - if (ret__ == (oval)) \ 415 - raw_cpu_write(pcp, nval); \ 416 - raw_local_irq_restore(flags); \ 417 - ret__; \ 418 - }) 419 - 420 - #ifndef this_cpu_cmpxchg 421 - # ifndef this_cpu_cmpxchg_1 422 - # define this_cpu_cmpxchg_1(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) 423 - # endif 424 - # ifndef this_cpu_cmpxchg_2 425 - # define this_cpu_cmpxchg_2(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) 426 - # endif 427 - # ifndef this_cpu_cmpxchg_4 428 - # define this_cpu_cmpxchg_4(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) 429 - # endif 430 - # ifndef this_cpu_cmpxchg_8 431 - # define this_cpu_cmpxchg_8(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) 432 - # endif 433 - # define this_cpu_cmpxchg(pcp, oval, nval) \ 434 - __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) 435 - #endif 436 - 437 - /* 438 - * cmpxchg_double replaces two adjacent scalars at once. The first 439 - * two parameters are per cpu variables which have to be of the same 440 - * size. A truth value is returned to indicate success or failure 441 - * (since a double register result is difficult to handle). There is 442 - * very limited hardware support for these operations, so only certain 443 - * sizes may work. 444 - */ 445 - #define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 446 - ({ \ 447 - int ret__; \ 448 - unsigned long flags; \ 449 - raw_local_irq_save(flags); \ 450 - ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ 451 - oval1, oval2, nval1, nval2); \ 452 - raw_local_irq_restore(flags); \ 453 - ret__; \ 454 - }) 455 - 456 - #ifndef this_cpu_cmpxchg_double 457 - # ifndef this_cpu_cmpxchg_double_1 458 - # define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 459 - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 460 - # endif 461 - # ifndef this_cpu_cmpxchg_double_2 462 - # define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 463 - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 464 - # endif 465 - # ifndef this_cpu_cmpxchg_double_4 466 - # define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 467 - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 468 - # endif 469 - # ifndef this_cpu_cmpxchg_double_8 470 - # define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 471 - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 472 - # endif 473 - # define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 474 - __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) 475 - #endif 476 - 477 - /* 478 - * Generic percpu operations for contexts where we do not want to do 479 - * any checks for preemptiosn. 480 - * 481 - * If there is no other protection through preempt disable and/or 482 - * disabling interupts then one of these RMW operations can show unexpected 483 - * behavior because the execution thread was rescheduled on another processor 484 - * or an interrupt occurred and the same percpu variable was modified from 485 - * the interrupt context. 486 - */ 487 - #ifndef raw_cpu_read 488 - # ifndef raw_cpu_read_1 489 - # define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) 490 - # endif 491 - # ifndef raw_cpu_read_2 492 - # define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) 493 - # endif 494 - # ifndef raw_cpu_read_4 495 - # define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) 496 - # endif 497 - # ifndef raw_cpu_read_8 498 - # define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) 499 - # endif 500 - # define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) 501 - #endif 502 - 503 - #define raw_cpu_generic_to_op(pcp, val, op) \ 504 - do { \ 505 - *raw_cpu_ptr(&(pcp)) op val; \ 506 - } while (0) 507 - 508 - 509 - #ifndef raw_cpu_write 510 - # ifndef raw_cpu_write_1 511 - # define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) 512 - # endif 513 - # ifndef raw_cpu_write_2 514 - # define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) 515 - # endif 516 - # ifndef raw_cpu_write_4 517 - # define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) 518 - # endif 519 - # ifndef raw_cpu_write_8 520 - # define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) 521 - # endif 522 - # define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) 523 - #endif 524 - 525 - #ifndef raw_cpu_add 526 - # ifndef raw_cpu_add_1 527 - # define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) 528 - # endif 529 - # ifndef raw_cpu_add_2 530 - # define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) 531 - # endif 532 - # ifndef raw_cpu_add_4 533 - # define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) 534 - # endif 535 - # ifndef raw_cpu_add_8 536 - # define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) 537 - # endif 538 - # define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) 539 - #endif 540 - 541 - #ifndef raw_cpu_sub 542 - # define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val)) 543 - #endif 544 - 545 - #ifndef raw_cpu_inc 546 - # define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1) 547 - #endif 548 - 549 - #ifndef raw_cpu_dec 550 - # define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1) 551 - #endif 552 - 553 - #ifndef raw_cpu_and 554 - # ifndef raw_cpu_and_1 555 - # define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) 556 - # endif 557 - # ifndef raw_cpu_and_2 558 - # define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) 559 - # endif 560 - # ifndef raw_cpu_and_4 561 - # define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) 562 - # endif 563 - # ifndef raw_cpu_and_8 564 - # define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) 565 - # endif 566 - # define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) 567 - #endif 568 - 569 - #ifndef raw_cpu_or 570 - # ifndef raw_cpu_or_1 571 - # define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) 572 - # endif 573 - # ifndef raw_cpu_or_2 574 - # define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) 575 - # endif 576 - # ifndef raw_cpu_or_4 577 - # define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) 578 - # endif 579 - # ifndef raw_cpu_or_8 580 - # define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) 581 - # endif 582 - # define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) 583 - #endif 584 - 585 - #define raw_cpu_generic_add_return(pcp, val) \ 586 - ({ \ 587 - raw_cpu_add(pcp, val); \ 588 - raw_cpu_read(pcp); \ 589 - }) 590 - 591 - #ifndef raw_cpu_add_return 592 - # ifndef raw_cpu_add_return_1 593 - # define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) 594 - # endif 595 - # ifndef raw_cpu_add_return_2 596 - # define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val) 597 - # endif 598 - # ifndef raw_cpu_add_return_4 599 - # define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val) 600 - # endif 601 - # ifndef raw_cpu_add_return_8 602 - # define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val) 603 - # endif 604 - # define raw_cpu_add_return(pcp, val) \ 605 - __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) 606 - #endif 607 - 608 - #define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) 609 - #define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) 610 - #define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) 611 - 612 - #define raw_cpu_generic_xchg(pcp, nval) \ 613 - ({ typeof(pcp) ret__; \ 614 - ret__ = raw_cpu_read(pcp); \ 615 - raw_cpu_write(pcp, nval); \ 616 - ret__; \ 617 - }) 618 - 619 - #ifndef raw_cpu_xchg 620 - # ifndef raw_cpu_xchg_1 621 - # define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) 622 - # endif 623 - # ifndef raw_cpu_xchg_2 624 - # define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval) 625 - # endif 626 - # ifndef raw_cpu_xchg_4 627 - # define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval) 628 - # endif 629 - # ifndef raw_cpu_xchg_8 630 - # define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) 631 - # endif 632 - # define raw_cpu_xchg(pcp, nval) \ 633 - __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) 634 - #endif 635 - 636 - #define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ 637 - ({ \ 638 - typeof(pcp) ret__; \ 639 - ret__ = raw_cpu_read(pcp); \ 640 - if (ret__ == (oval)) \ 641 - raw_cpu_write(pcp, nval); \ 642 - ret__; \ 643 - }) 644 - 645 - #ifndef raw_cpu_cmpxchg 646 - # ifndef raw_cpu_cmpxchg_1 647 - # define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) 648 - # endif 649 - # ifndef raw_cpu_cmpxchg_2 650 - # define raw_cpu_cmpxchg_2(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) 651 - # endif 652 - # ifndef raw_cpu_cmpxchg_4 653 - # define raw_cpu_cmpxchg_4(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) 654 - # endif 655 - # ifndef raw_cpu_cmpxchg_8 656 - # define raw_cpu_cmpxchg_8(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) 657 - # endif 658 - # define raw_cpu_cmpxchg(pcp, oval, nval) \ 659 - __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) 660 - #endif 661 - 662 - #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 663 - ({ \ 664 - int __ret = 0; \ 665 - if (raw_cpu_read(pcp1) == (oval1) && \ 666 - raw_cpu_read(pcp2) == (oval2)) { \ 667 - raw_cpu_write(pcp1, (nval1)); \ 668 - raw_cpu_write(pcp2, (nval2)); \ 669 - __ret = 1; \ 670 - } \ 671 - (__ret); \ 672 - }) 673 - 674 - #ifndef raw_cpu_cmpxchg_double 675 - # ifndef raw_cpu_cmpxchg_double_1 676 - # define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 677 - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 678 - # endif 679 - # ifndef raw_cpu_cmpxchg_double_2 680 - # define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 681 - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 682 - # endif 683 - # ifndef raw_cpu_cmpxchg_double_4 684 - # define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 685 - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 686 - # endif 687 - # ifndef raw_cpu_cmpxchg_double_8 688 - # define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 689 - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) 690 - # endif 691 - # define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 692 - __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) 693 - #endif 694 - 695 - /* 696 - * Generic percpu operations for context that are safe from preemption/interrupts. 697 - */ 698 - #ifndef __this_cpu_read 699 - # define __this_cpu_read(pcp) \ 700 - (__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp))) 701 - #endif 702 - 703 - #ifndef __this_cpu_write 704 - # define __this_cpu_write(pcp, val) \ 705 - do { __this_cpu_preempt_check("write"); \ 706 - __pcpu_size_call(raw_cpu_write_, (pcp), (val)); \ 707 - } while (0) 708 - #endif 709 - 710 - #ifndef __this_cpu_add 711 - # define __this_cpu_add(pcp, val) \ 712 - do { __this_cpu_preempt_check("add"); \ 713 - __pcpu_size_call(raw_cpu_add_, (pcp), (val)); \ 714 - } while (0) 715 - #endif 716 - 717 - #ifndef __this_cpu_sub 718 - # define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) 719 - #endif 720 - 721 - #ifndef __this_cpu_inc 722 - # define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) 723 - #endif 724 - 725 - #ifndef __this_cpu_dec 726 - # define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) 727 - #endif 728 - 729 - #ifndef __this_cpu_and 730 - # define __this_cpu_and(pcp, val) \ 731 - do { __this_cpu_preempt_check("and"); \ 732 - __pcpu_size_call(raw_cpu_and_, (pcp), (val)); \ 733 - } while (0) 734 - 735 - #endif 736 - 737 - #ifndef __this_cpu_or 738 - # define __this_cpu_or(pcp, val) \ 739 - do { __this_cpu_preempt_check("or"); \ 740 - __pcpu_size_call(raw_cpu_or_, (pcp), (val)); \ 741 - } while (0) 742 - #endif 743 - 744 - #ifndef __this_cpu_add_return 745 - # define __this_cpu_add_return(pcp, val) \ 746 - (__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)) 747 - #endif 748 - 749 - #define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) 750 - #define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) 751 - #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) 752 - 753 - #ifndef __this_cpu_xchg 754 - # define __this_cpu_xchg(pcp, nval) \ 755 - (__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)) 756 - #endif 757 - 758 - #ifndef __this_cpu_cmpxchg 759 - # define __this_cpu_cmpxchg(pcp, oval, nval) \ 760 - (__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)) 761 - #endif 762 - 763 - #ifndef __this_cpu_cmpxchg_double 764 - # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ 765 - (__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))) 766 - #endif 767 168 768 169 #endif /* __LINUX_PERCPU_H */
+5 -3
kernel/cgroup.c
··· 1638 1638 exit_root_id: 1639 1639 cgroup_exit_root_id(root); 1640 1640 cancel_ref: 1641 - percpu_ref_cancel_init(&root_cgrp->self.refcnt); 1641 + percpu_ref_exit(&root_cgrp->self.refcnt); 1642 1642 out: 1643 1643 free_cgrp_cset_links(&tmp_links); 1644 1644 return ret; ··· 4175 4175 container_of(work, struct cgroup_subsys_state, destroy_work); 4176 4176 struct cgroup *cgrp = css->cgroup; 4177 4177 4178 + percpu_ref_exit(&css->refcnt); 4179 + 4178 4180 if (css->ss) { 4179 4181 /* css free path */ 4180 4182 if (css->parent) ··· 4374 4372 err_free_id: 4375 4373 cgroup_idr_remove(&ss->css_idr, css->id); 4376 4374 err_free_percpu_ref: 4377 - percpu_ref_cancel_init(&css->refcnt); 4375 + percpu_ref_exit(&css->refcnt); 4378 4376 err_free_css: 4379 4377 call_rcu(&css->rcu_head, css_free_rcu_fn); 4380 4378 return err; ··· 4485 4483 out_free_id: 4486 4484 cgroup_idr_remove(&root->cgroup_idr, cgrp->id); 4487 4485 out_cancel_ref: 4488 - percpu_ref_cancel_init(&cgrp->self.refcnt); 4486 + percpu_ref_exit(&cgrp->self.refcnt); 4489 4487 out_free_cgrp: 4490 4488 kfree(cgrp); 4491 4489 out_unlock:
+3 -3
kernel/workqueue.c
··· 1962 1962 1963 1963 lockdep_copy_map(&lockdep_map, &work->lockdep_map); 1964 1964 #endif 1965 + /* ensure we're on the correct CPU */ 1965 1966 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && 1966 1967 raw_smp_processor_id() != pool->cpu); 1967 1968 ··· 4575 4574 for_each_pool(pool, pi) { 4576 4575 mutex_lock(&pool->attach_mutex); 4577 4576 4578 - if (pool->cpu == cpu) { 4577 + if (pool->cpu == cpu) 4579 4578 rebind_workers(pool); 4580 - } else if (pool->cpu < 0) { 4579 + else if (pool->cpu < 0) 4581 4580 restore_unbound_workers_cpumask(pool, cpu); 4582 - } 4583 4581 4584 4582 mutex_unlock(&pool->attach_mutex); 4585 4583 }
+54 -32
lib/percpu-refcount.c
··· 31 31 32 32 #define PCPU_COUNT_BIAS (1U << 31) 33 33 34 + static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref) 35 + { 36 + return (unsigned __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD); 37 + } 38 + 34 39 /** 35 40 * percpu_ref_init - initialize a percpu refcount 36 41 * @ref: percpu_ref to initialize ··· 51 46 { 52 47 atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); 53 48 54 - ref->pcpu_count = alloc_percpu(unsigned); 55 - if (!ref->pcpu_count) 49 + ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned); 50 + if (!ref->pcpu_count_ptr) 56 51 return -ENOMEM; 57 52 58 53 ref->release = release; ··· 61 56 EXPORT_SYMBOL_GPL(percpu_ref_init); 62 57 63 58 /** 64 - * percpu_ref_cancel_init - cancel percpu_ref_init() 65 - * @ref: percpu_ref to cancel init for 59 + * percpu_ref_reinit - re-initialize a percpu refcount 60 + * @ref: perpcu_ref to re-initialize 66 61 * 67 - * Once a percpu_ref is initialized, its destruction is initiated by 68 - * percpu_ref_kill() and completes asynchronously, which can be painful to 69 - * do when destroying a half-constructed object in init failure path. 62 + * Re-initialize @ref so that it's in the same state as when it finished 63 + * percpu_ref_init(). @ref must have been initialized successfully, killed 64 + * and reached 0 but not exited. 70 65 * 71 - * This function destroys @ref without invoking @ref->release and the 72 - * memory area containing it can be freed immediately on return. To 73 - * prevent accidental misuse, it's required that @ref has finished 74 - * percpu_ref_init(), whether successful or not, but never used. 75 - * 76 - * The weird name and usage restriction are to prevent people from using 77 - * this function by mistake for normal shutdown instead of 78 - * percpu_ref_kill(). 66 + * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while 67 + * this function is in progress. 79 68 */ 80 - void percpu_ref_cancel_init(struct percpu_ref *ref) 69 + void percpu_ref_reinit(struct percpu_ref *ref) 81 70 { 82 - unsigned __percpu *pcpu_count = ref->pcpu_count; 71 + unsigned __percpu *pcpu_count = pcpu_count_ptr(ref); 83 72 int cpu; 84 73 85 - WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS); 74 + BUG_ON(!pcpu_count); 75 + WARN_ON(!percpu_ref_is_zero(ref)); 76 + 77 + atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); 78 + 79 + /* 80 + * Restore per-cpu operation. smp_store_release() is paired with 81 + * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees 82 + * that the zeroing is visible to all percpu accesses which can see 83 + * the following PCPU_REF_DEAD clearing. 84 + */ 85 + for_each_possible_cpu(cpu) 86 + *per_cpu_ptr(pcpu_count, cpu) = 0; 87 + 88 + smp_store_release(&ref->pcpu_count_ptr, 89 + ref->pcpu_count_ptr & ~PCPU_REF_DEAD); 90 + } 91 + EXPORT_SYMBOL_GPL(percpu_ref_reinit); 92 + 93 + /** 94 + * percpu_ref_exit - undo percpu_ref_init() 95 + * @ref: percpu_ref to exit 96 + * 97 + * This function exits @ref. The caller is responsible for ensuring that 98 + * @ref is no longer in active use. The usual places to invoke this 99 + * function from are the @ref->release() callback or in init failure path 100 + * where percpu_ref_init() succeeded but other parts of the initialization 101 + * of the embedding object failed. 102 + */ 103 + void percpu_ref_exit(struct percpu_ref *ref) 104 + { 105 + unsigned __percpu *pcpu_count = pcpu_count_ptr(ref); 86 106 87 107 if (pcpu_count) { 88 - for_each_possible_cpu(cpu) 89 - WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu)); 90 - free_percpu(ref->pcpu_count); 108 + free_percpu(pcpu_count); 109 + ref->pcpu_count_ptr = PCPU_REF_DEAD; 91 110 } 92 111 } 93 - EXPORT_SYMBOL_GPL(percpu_ref_cancel_init); 112 + EXPORT_SYMBOL_GPL(percpu_ref_exit); 94 113 95 114 static void percpu_ref_kill_rcu(struct rcu_head *rcu) 96 115 { 97 116 struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); 98 - unsigned __percpu *pcpu_count = ref->pcpu_count; 117 + unsigned __percpu *pcpu_count = pcpu_count_ptr(ref); 99 118 unsigned count = 0; 100 119 int cpu; 101 120 102 - /* Mask out PCPU_REF_DEAD */ 103 - pcpu_count = (unsigned __percpu *) 104 - (((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK); 105 - 106 121 for_each_possible_cpu(cpu) 107 122 count += *per_cpu_ptr(pcpu_count, cpu); 108 - 109 - free_percpu(pcpu_count); 110 123 111 124 pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count); 112 125 ··· 175 152 void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 176 153 percpu_ref_func_t *confirm_kill) 177 154 { 178 - WARN_ONCE(REF_STATUS(ref->pcpu_count) == PCPU_REF_DEAD, 155 + WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD, 179 156 "percpu_ref_kill() called more than once!\n"); 180 157 181 - ref->pcpu_count = (unsigned __percpu *) 182 - (((unsigned long) ref->pcpu_count)|PCPU_REF_DEAD); 158 + ref->pcpu_count_ptr |= PCPU_REF_DEAD; 183 159 ref->confirm_kill = confirm_kill; 184 160 185 161 call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
+1 -2
mm/percpu.c
··· 720 720 if (unlikely(align < 2)) 721 721 align = 2; 722 722 723 - if (unlikely(size & 1)) 724 - size++; 723 + size = ALIGN(size, 2); 725 724 726 725 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { 727 726 WARN(true, "illegal size (%zu) or align (%zu) for "