Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/percpu: Differentiate this_cpu_{}() and __this_cpu_{}()

Nadav Amit reported that commit:

b59167ac7baf ("x86/percpu: Fix this_cpu_read()")

added a bunch of constraints to all sorts of code; and while some of
that was correct and desired, some of that seems superfluous.

The thing is, the this_cpu_*() operations are defined IRQ-safe, this
means the values are subject to change from IRQs, and thus must be
reloaded.

Also, the generic form:

local_irq_save()
__this_cpu_read()
local_irq_restore()

would not allow the re-use of previous values; if by nothing else,
then the barrier()s implied by local_irq_*().

Which raises the point that percpu_from_op() and the others also need
that volatile.

OTOH __this_cpu_*() operations are not IRQ-safe and assume external
preempt/IRQ disabling and could thus be allowed more room for
optimization.

This makes the this_cpu_*() vs __this_cpu_*() behaviour more
consistent with other architectures.

$ ./compare.sh defconfig-build defconfig-build1 vmlinux.o
x86_pmu_cancel_txn 80 71 -9,+0
__text_poke 919 964 +45,+0
do_user_addr_fault 1082 1058 -24,+0
__do_page_fault 1194 1178 -16,+0
do_exit 2995 3027 -43,+75
process_one_work 1008 989 -67,+48
finish_task_switch 524 505 -19,+0
__schedule_bug 103 98 -59,+54
__schedule_bug 103 98 -59,+54
__sched_setscheduler 2015 2030 +15,+0
freeze_processes 203 230 +31,-4
rcu_gp_kthread_wake 106 99 -7,+0
rcu_core 1841 1834 -7,+0
call_timer_fn 298 286 -12,+0
can_stop_idle_tick 146 139 -31,+24
perf_pending_event 253 239 -14,+0
shmem_alloc_page 209 213 +4,+0
__alloc_pages_slowpath 3284 3269 -15,+0
umount_tree 671 694 +23,+0
advance_transaction 803 798 -5,+0
con_put_char 71 51 -20,+0
xhci_urb_enqueue 1302 1295 -7,+0
xhci_urb_enqueue 1302 1295 -7,+0
tcp_sacktag_write_queue 2130 2075 -55,+0
tcp_try_undo_loss 229 208 -21,+0
tcp_v4_inbound_md5_hash 438 411 -31,+4
tcp_v4_inbound_md5_hash 438 411 -31,+4
tcp_v6_inbound_md5_hash 469 411 -33,-25
tcp_v6_inbound_md5_hash 469 411 -33,-25
restricted_pointer 434 420 -14,+0
irq_exit 162 154 -8,+0
get_perf_callchain 638 624 -14,+0
rt_mutex_trylock 169 156 -13,+0
avc_has_extended_perms 1092 1089 -3,+0
avc_has_perm_noaudit 309 306 -3,+0
__perf_sw_event 138 122 -16,+0
perf_swevent_get_recursion_context 116 102 -14,+0
__local_bh_enable_ip 93 72 -21,+0
xfrm_input 4175 4161 -14,+0
avc_has_perm 446 443 -3,+0
vm_events_fold_cpu 57 56 -1,+0
vfree 68 61 -7,+0
freeze_processes 203 230 +31,-4
_local_bh_enable 44 30 -14,+0
ip_do_fragment 1982 1944 -38,+0
do_exit 2995 3027 -43,+75
__do_softirq 742 724 -18,+0
cpu_init 1510 1489 -21,+0
account_system_time 80 79 -1,+0
total 12985281 12984819 -742,+280

Reported-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20181206112433.GB13675@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Peter Zijlstra and committed by
Ingo Molnar
0b9ccc0a a15ea1a3

+107 -107
+107 -107
arch/x86/include/asm/percpu.h
··· 87 87 * don't give an lvalue though). */ 88 88 extern void __bad_percpu_size(void); 89 89 90 - #define percpu_to_op(op, var, val) \ 90 + #define percpu_to_op(qual, op, var, val) \ 91 91 do { \ 92 92 typedef typeof(var) pto_T__; \ 93 93 if (0) { \ ··· 97 97 } \ 98 98 switch (sizeof(var)) { \ 99 99 case 1: \ 100 - asm(op "b %1,"__percpu_arg(0) \ 100 + asm qual (op "b %1,"__percpu_arg(0) \ 101 101 : "+m" (var) \ 102 102 : "qi" ((pto_T__)(val))); \ 103 103 break; \ 104 104 case 2: \ 105 - asm(op "w %1,"__percpu_arg(0) \ 105 + asm qual (op "w %1,"__percpu_arg(0) \ 106 106 : "+m" (var) \ 107 107 : "ri" ((pto_T__)(val))); \ 108 108 break; \ 109 109 case 4: \ 110 - asm(op "l %1,"__percpu_arg(0) \ 110 + asm qual (op "l %1,"__percpu_arg(0) \ 111 111 : "+m" (var) \ 112 112 : "ri" ((pto_T__)(val))); \ 113 113 break; \ 114 114 case 8: \ 115 - asm(op "q %1,"__percpu_arg(0) \ 115 + asm qual (op "q %1,"__percpu_arg(0) \ 116 116 : "+m" (var) \ 117 117 : "re" ((pto_T__)(val))); \ 118 118 break; \ ··· 124 124 * Generate a percpu add to memory instruction and optimize code 125 125 * if one is added or subtracted. 126 126 */ 127 - #define percpu_add_op(var, val) \ 127 + #define percpu_add_op(qual, var, val) \ 128 128 do { \ 129 129 typedef typeof(var) pao_T__; \ 130 130 const int pao_ID__ = (__builtin_constant_p(val) && \ ··· 138 138 switch (sizeof(var)) { \ 139 139 case 1: \ 140 140 if (pao_ID__ == 1) \ 141 - asm("incb "__percpu_arg(0) : "+m" (var)); \ 141 + asm qual ("incb "__percpu_arg(0) : "+m" (var)); \ 142 142 else if (pao_ID__ == -1) \ 143 - asm("decb "__percpu_arg(0) : "+m" (var)); \ 143 + asm qual ("decb "__percpu_arg(0) : "+m" (var)); \ 144 144 else \ 145 - asm("addb %1, "__percpu_arg(0) \ 145 + asm qual ("addb %1, "__percpu_arg(0) \ 146 146 : "+m" (var) \ 147 147 : "qi" ((pao_T__)(val))); \ 148 148 break; \ 149 149 case 2: \ 150 150 if (pao_ID__ == 1) \ 151 - asm("incw "__percpu_arg(0) : "+m" (var)); \ 151 + asm qual ("incw "__percpu_arg(0) : "+m" (var)); \ 152 152 else if (pao_ID__ == -1) \ 153 - asm("decw "__percpu_arg(0) : "+m" (var)); \ 153 + asm qual ("decw "__percpu_arg(0) : "+m" (var)); \ 154 154 else \ 155 - asm("addw %1, "__percpu_arg(0) \ 155 + asm qual ("addw %1, "__percpu_arg(0) \ 156 156 : "+m" (var) \ 157 157 : "ri" ((pao_T__)(val))); \ 158 158 break; \ 159 159 case 4: \ 160 160 if (pao_ID__ == 1) \ 161 - asm("incl "__percpu_arg(0) : "+m" (var)); \ 161 + asm qual ("incl "__percpu_arg(0) : "+m" (var)); \ 162 162 else if (pao_ID__ == -1) \ 163 - asm("decl "__percpu_arg(0) : "+m" (var)); \ 163 + asm qual ("decl "__percpu_arg(0) : "+m" (var)); \ 164 164 else \ 165 - asm("addl %1, "__percpu_arg(0) \ 165 + asm qual ("addl %1, "__percpu_arg(0) \ 166 166 : "+m" (var) \ 167 167 : "ri" ((pao_T__)(val))); \ 168 168 break; \ 169 169 case 8: \ 170 170 if (pao_ID__ == 1) \ 171 - asm("incq "__percpu_arg(0) : "+m" (var)); \ 171 + asm qual ("incq "__percpu_arg(0) : "+m" (var)); \ 172 172 else if (pao_ID__ == -1) \ 173 - asm("decq "__percpu_arg(0) : "+m" (var)); \ 173 + asm qual ("decq "__percpu_arg(0) : "+m" (var)); \ 174 174 else \ 175 - asm("addq %1, "__percpu_arg(0) \ 175 + asm qual ("addq %1, "__percpu_arg(0) \ 176 176 : "+m" (var) \ 177 177 : "re" ((pao_T__)(val))); \ 178 178 break; \ ··· 180 180 } \ 181 181 } while (0) 182 182 183 - #define percpu_from_op(op, var) \ 183 + #define percpu_from_op(qual, op, var) \ 184 184 ({ \ 185 185 typeof(var) pfo_ret__; \ 186 186 switch (sizeof(var)) { \ 187 187 case 1: \ 188 - asm volatile(op "b "__percpu_arg(1)",%0"\ 188 + asm qual (op "b "__percpu_arg(1)",%0" \ 189 189 : "=q" (pfo_ret__) \ 190 190 : "m" (var)); \ 191 191 break; \ 192 192 case 2: \ 193 - asm volatile(op "w "__percpu_arg(1)",%0"\ 193 + asm qual (op "w "__percpu_arg(1)",%0" \ 194 194 : "=r" (pfo_ret__) \ 195 195 : "m" (var)); \ 196 196 break; \ 197 197 case 4: \ 198 - asm volatile(op "l "__percpu_arg(1)",%0"\ 198 + asm qual (op "l "__percpu_arg(1)",%0" \ 199 199 : "=r" (pfo_ret__) \ 200 200 : "m" (var)); \ 201 201 break; \ 202 202 case 8: \ 203 - asm volatile(op "q "__percpu_arg(1)",%0"\ 203 + asm qual (op "q "__percpu_arg(1)",%0" \ 204 204 : "=r" (pfo_ret__) \ 205 205 : "m" (var)); \ 206 206 break; \ ··· 238 238 pfo_ret__; \ 239 239 }) 240 240 241 - #define percpu_unary_op(op, var) \ 241 + #define percpu_unary_op(qual, op, var) \ 242 242 ({ \ 243 243 switch (sizeof(var)) { \ 244 244 case 1: \ 245 - asm(op "b "__percpu_arg(0) \ 245 + asm qual (op "b "__percpu_arg(0) \ 246 246 : "+m" (var)); \ 247 247 break; \ 248 248 case 2: \ 249 - asm(op "w "__percpu_arg(0) \ 249 + asm qual (op "w "__percpu_arg(0) \ 250 250 : "+m" (var)); \ 251 251 break; \ 252 252 case 4: \ 253 - asm(op "l "__percpu_arg(0) \ 253 + asm qual (op "l "__percpu_arg(0) \ 254 254 : "+m" (var)); \ 255 255 break; \ 256 256 case 8: \ 257 - asm(op "q "__percpu_arg(0) \ 257 + asm qual (op "q "__percpu_arg(0) \ 258 258 : "+m" (var)); \ 259 259 break; \ 260 260 default: __bad_percpu_size(); \ ··· 264 264 /* 265 265 * Add return operation 266 266 */ 267 - #define percpu_add_return_op(var, val) \ 267 + #define percpu_add_return_op(qual, var, val) \ 268 268 ({ \ 269 269 typeof(var) paro_ret__ = val; \ 270 270 switch (sizeof(var)) { \ 271 271 case 1: \ 272 - asm("xaddb %0, "__percpu_arg(1) \ 272 + asm qual ("xaddb %0, "__percpu_arg(1) \ 273 273 : "+q" (paro_ret__), "+m" (var) \ 274 274 : : "memory"); \ 275 275 break; \ 276 276 case 2: \ 277 - asm("xaddw %0, "__percpu_arg(1) \ 277 + asm qual ("xaddw %0, "__percpu_arg(1) \ 278 278 : "+r" (paro_ret__), "+m" (var) \ 279 279 : : "memory"); \ 280 280 break; \ 281 281 case 4: \ 282 - asm("xaddl %0, "__percpu_arg(1) \ 282 + asm qual ("xaddl %0, "__percpu_arg(1) \ 283 283 : "+r" (paro_ret__), "+m" (var) \ 284 284 : : "memory"); \ 285 285 break; \ 286 286 case 8: \ 287 - asm("xaddq %0, "__percpu_arg(1) \ 287 + asm qual ("xaddq %0, "__percpu_arg(1) \ 288 288 : "+re" (paro_ret__), "+m" (var) \ 289 289 : : "memory"); \ 290 290 break; \ ··· 299 299 * expensive due to the implied lock prefix. The processor cannot prefetch 300 300 * cachelines if xchg is used. 301 301 */ 302 - #define percpu_xchg_op(var, nval) \ 302 + #define percpu_xchg_op(qual, var, nval) \ 303 303 ({ \ 304 304 typeof(var) pxo_ret__; \ 305 305 typeof(var) pxo_new__ = (nval); \ 306 306 switch (sizeof(var)) { \ 307 307 case 1: \ 308 - asm("\n\tmov "__percpu_arg(1)",%%al" \ 308 + asm qual ("\n\tmov "__percpu_arg(1)",%%al" \ 309 309 "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ 310 310 "\n\tjnz 1b" \ 311 311 : "=&a" (pxo_ret__), "+m" (var) \ ··· 313 313 : "memory"); \ 314 314 break; \ 315 315 case 2: \ 316 - asm("\n\tmov "__percpu_arg(1)",%%ax" \ 316 + asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \ 317 317 "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ 318 318 "\n\tjnz 1b" \ 319 319 : "=&a" (pxo_ret__), "+m" (var) \ ··· 321 321 : "memory"); \ 322 322 break; \ 323 323 case 4: \ 324 - asm("\n\tmov "__percpu_arg(1)",%%eax" \ 324 + asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \ 325 325 "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ 326 326 "\n\tjnz 1b" \ 327 327 : "=&a" (pxo_ret__), "+m" (var) \ ··· 329 329 : "memory"); \ 330 330 break; \ 331 331 case 8: \ 332 - asm("\n\tmov "__percpu_arg(1)",%%rax" \ 332 + asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \ 333 333 "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ 334 334 "\n\tjnz 1b" \ 335 335 : "=&a" (pxo_ret__), "+m" (var) \ ··· 345 345 * cmpxchg has no such implied lock semantics as a result it is much 346 346 * more efficient for cpu local operations. 347 347 */ 348 - #define percpu_cmpxchg_op(var, oval, nval) \ 348 + #define percpu_cmpxchg_op(qual, var, oval, nval) \ 349 349 ({ \ 350 350 typeof(var) pco_ret__; \ 351 351 typeof(var) pco_old__ = (oval); \ 352 352 typeof(var) pco_new__ = (nval); \ 353 353 switch (sizeof(var)) { \ 354 354 case 1: \ 355 - asm("cmpxchgb %2, "__percpu_arg(1) \ 355 + asm qual ("cmpxchgb %2, "__percpu_arg(1) \ 356 356 : "=a" (pco_ret__), "+m" (var) \ 357 357 : "q" (pco_new__), "0" (pco_old__) \ 358 358 : "memory"); \ 359 359 break; \ 360 360 case 2: \ 361 - asm("cmpxchgw %2, "__percpu_arg(1) \ 361 + asm qual ("cmpxchgw %2, "__percpu_arg(1) \ 362 362 : "=a" (pco_ret__), "+m" (var) \ 363 363 : "r" (pco_new__), "0" (pco_old__) \ 364 364 : "memory"); \ 365 365 break; \ 366 366 case 4: \ 367 - asm("cmpxchgl %2, "__percpu_arg(1) \ 367 + asm qual ("cmpxchgl %2, "__percpu_arg(1) \ 368 368 : "=a" (pco_ret__), "+m" (var) \ 369 369 : "r" (pco_new__), "0" (pco_old__) \ 370 370 : "memory"); \ 371 371 break; \ 372 372 case 8: \ 373 - asm("cmpxchgq %2, "__percpu_arg(1) \ 373 + asm qual ("cmpxchgq %2, "__percpu_arg(1) \ 374 374 : "=a" (pco_ret__), "+m" (var) \ 375 375 : "r" (pco_new__), "0" (pco_old__) \ 376 376 : "memory"); \ ··· 391 391 */ 392 392 #define this_cpu_read_stable(var) percpu_stable_op("mov", var) 393 393 394 - #define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp) 395 - #define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp) 396 - #define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp) 394 + #define raw_cpu_read_1(pcp) percpu_from_op(, "mov", pcp) 395 + #define raw_cpu_read_2(pcp) percpu_from_op(, "mov", pcp) 396 + #define raw_cpu_read_4(pcp) percpu_from_op(, "mov", pcp) 397 397 398 - #define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 399 - #define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 400 - #define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) 401 - #define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val) 402 - #define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val) 403 - #define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val) 404 - #define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) 405 - #define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) 406 - #define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) 407 - #define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) 408 - #define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) 409 - #define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) 410 - #define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) 411 - #define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) 412 - #define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) 398 + #define raw_cpu_write_1(pcp, val) percpu_to_op(, "mov", (pcp), val) 399 + #define raw_cpu_write_2(pcp, val) percpu_to_op(, "mov", (pcp), val) 400 + #define raw_cpu_write_4(pcp, val) percpu_to_op(, "mov", (pcp), val) 401 + #define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val) 402 + #define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val) 403 + #define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val) 404 + #define raw_cpu_and_1(pcp, val) percpu_to_op(, "and", (pcp), val) 405 + #define raw_cpu_and_2(pcp, val) percpu_to_op(, "and", (pcp), val) 406 + #define raw_cpu_and_4(pcp, val) percpu_to_op(, "and", (pcp), val) 407 + #define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val) 408 + #define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val) 409 + #define raw_cpu_or_4(pcp, val) percpu_to_op(, "or", (pcp), val) 410 + #define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(, pcp, val) 411 + #define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(, pcp, val) 412 + #define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(, pcp, val) 413 413 414 - #define this_cpu_read_1(pcp) percpu_from_op("mov", pcp) 415 - #define this_cpu_read_2(pcp) percpu_from_op("mov", pcp) 416 - #define this_cpu_read_4(pcp) percpu_from_op("mov", pcp) 417 - #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 418 - #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 419 - #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) 420 - #define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) 421 - #define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) 422 - #define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) 423 - #define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) 424 - #define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) 425 - #define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) 426 - #define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) 427 - #define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) 428 - #define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) 429 - #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) 430 - #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) 431 - #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) 414 + #define this_cpu_read_1(pcp) percpu_from_op(volatile, "mov", pcp) 415 + #define this_cpu_read_2(pcp) percpu_from_op(volatile, "mov", pcp) 416 + #define this_cpu_read_4(pcp) percpu_from_op(volatile, "mov", pcp) 417 + #define this_cpu_write_1(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) 418 + #define this_cpu_write_2(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) 419 + #define this_cpu_write_4(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) 420 + #define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val) 421 + #define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val) 422 + #define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val) 423 + #define this_cpu_and_1(pcp, val) percpu_to_op(volatile, "and", (pcp), val) 424 + #define this_cpu_and_2(pcp, val) percpu_to_op(volatile, "and", (pcp), val) 425 + #define this_cpu_and_4(pcp, val) percpu_to_op(volatile, "and", (pcp), val) 426 + #define this_cpu_or_1(pcp, val) percpu_to_op(volatile, "or", (pcp), val) 427 + #define this_cpu_or_2(pcp, val) percpu_to_op(volatile, "or", (pcp), val) 428 + #define this_cpu_or_4(pcp, val) percpu_to_op(volatile, "or", (pcp), val) 429 + #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval) 430 + #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval) 431 + #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval) 432 432 433 - #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 434 - #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 435 - #define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 436 - #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 437 - #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 438 - #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 433 + #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val) 434 + #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val) 435 + #define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val) 436 + #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) 437 + #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) 438 + #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) 439 439 440 - #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) 441 - #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) 442 - #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) 443 - #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 444 - #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 445 - #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 440 + #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val) 441 + #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val) 442 + #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val) 443 + #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) 444 + #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) 445 + #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) 446 446 447 447 #ifdef CONFIG_X86_CMPXCHG64 448 448 #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ ··· 466 466 * 32 bit must fall back to generic operations. 467 467 */ 468 468 #ifdef CONFIG_X86_64 469 - #define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp) 470 - #define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 471 - #define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 472 - #define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 473 - #define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 474 - #define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 475 - #define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 476 - #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 469 + #define raw_cpu_read_8(pcp) percpu_from_op(, "mov", pcp) 470 + #define raw_cpu_write_8(pcp, val) percpu_to_op(, "mov", (pcp), val) 471 + #define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val) 472 + #define raw_cpu_and_8(pcp, val) percpu_to_op(, "and", (pcp), val) 473 + #define raw_cpu_or_8(pcp, val) percpu_to_op(, "or", (pcp), val) 474 + #define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val) 475 + #define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(, pcp, nval) 476 + #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) 477 477 478 - #define this_cpu_read_8(pcp) percpu_from_op("mov", pcp) 479 - #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 480 - #define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 481 - #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 482 - #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 483 - #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) 484 - #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 485 - #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 478 + #define this_cpu_read_8(pcp) percpu_from_op(volatile, "mov", pcp) 479 + #define this_cpu_write_8(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) 480 + #define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val) 481 + #define this_cpu_and_8(pcp, val) percpu_to_op(volatile, "and", (pcp), val) 482 + #define this_cpu_or_8(pcp, val) percpu_to_op(volatile, "or", (pcp), val) 483 + #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val) 484 + #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval) 485 + #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) 486 486 487 487 /* 488 488 * Pretty complex macro to generate cmpxchg16 instruction. The instruction