Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86-64: Use RIP-relative addressing for most per-CPU accesses

Observing that per-CPU data (in the SMP case) is reachable by
exploiting 64-bit address wraparound (building on the default kernel
load address being at 16Mb), the one byte shorter RIP-relative
addressing form can be used for most per-CPU accesses. The one
exception are the "stable" reads, where the use of the "P" operand
modifier prevents the compiler from using RIP-relative addressing, but
is unavoidable due to the use of the "p" constraint (side note: with
gcc 4.9.x the intended effect of this isn't being achieved anymore,
see gcc bug 63637).

With the dependency on the minimum kernel load address, arbitrarily
low values for CONFIG_PHYSICAL_START are now no longer possible. A
link time assertion is being added, directing to the need to increase
that value when it triggers.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/5458A1780200007800044A9D@mail.emea.novell.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

authored by

Jan Beulich and committed by
Thomas Gleixner
97b67ae5 6d24c5f7

+46 -15
+44 -15
arch/x86/include/asm/percpu.h
··· 64 64 #define __percpu_prefix "" 65 65 #endif 66 66 67 - #define __percpu_arg(x) __percpu_prefix "%P" #x 67 + #define __percpu_arg(x) __percpu_prefix "%" #x 68 68 69 69 /* 70 70 * Initialized pointers to per-cpu variables needed for the boot ··· 179 179 } \ 180 180 } while (0) 181 181 182 - #define percpu_from_op(op, var, constraint) \ 182 + #define percpu_from_op(op, var) \ 183 183 ({ \ 184 184 typeof(var) pfo_ret__; \ 185 185 switch (sizeof(var)) { \ 186 186 case 1: \ 187 187 asm(op "b "__percpu_arg(1)",%0" \ 188 188 : "=q" (pfo_ret__) \ 189 - : constraint); \ 189 + : "m" (var)); \ 190 190 break; \ 191 191 case 2: \ 192 192 asm(op "w "__percpu_arg(1)",%0" \ 193 193 : "=r" (pfo_ret__) \ 194 - : constraint); \ 194 + : "m" (var)); \ 195 195 break; \ 196 196 case 4: \ 197 197 asm(op "l "__percpu_arg(1)",%0" \ 198 198 : "=r" (pfo_ret__) \ 199 - : constraint); \ 199 + : "m" (var)); \ 200 200 break; \ 201 201 case 8: \ 202 202 asm(op "q "__percpu_arg(1)",%0" \ 203 203 : "=r" (pfo_ret__) \ 204 - : constraint); \ 204 + : "m" (var)); \ 205 + break; \ 206 + default: __bad_percpu_size(); \ 207 + } \ 208 + pfo_ret__; \ 209 + }) 210 + 211 + #define percpu_stable_op(op, var) \ 212 + ({ \ 213 + typeof(var) pfo_ret__; \ 214 + switch (sizeof(var)) { \ 215 + case 1: \ 216 + asm(op "b "__percpu_arg(P1)",%0" \ 217 + : "=q" (pfo_ret__) \ 218 + : "p" (&(var))); \ 219 + break; \ 220 + case 2: \ 221 + asm(op "w "__percpu_arg(P1)",%0" \ 222 + : "=r" (pfo_ret__) \ 223 + : "p" (&(var))); \ 224 + break; \ 225 + case 4: \ 226 + asm(op "l "__percpu_arg(P1)",%0" \ 227 + : "=r" (pfo_ret__) \ 228 + : "p" (&(var))); \ 229 + break; \ 230 + case 8: \ 231 + asm(op "q "__percpu_arg(P1)",%0" \ 232 + : "=r" (pfo_ret__) \ 233 + : "p" (&(var))); \ 205 234 break; \ 206 235 default: __bad_percpu_size(); \ 207 236 } \ ··· 388 359 * per-thread variables implemented as per-cpu variables and thus 389 360 * stable for the duration of the respective task. 390 361 */ 391 - #define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) 362 + #define this_cpu_read_stable(var) percpu_stable_op("mov", var) 392 363 393 - #define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 394 - #define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 395 - #define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 364 + #define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp) 365 + #define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp) 366 + #define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp) 396 367 397 368 #define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 398 369 #define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) ··· 410 381 #define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) 411 382 #define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) 412 383 413 - #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 414 - #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 415 - #define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 384 + #define this_cpu_read_1(pcp) percpu_from_op("mov", pcp) 385 + #define this_cpu_read_2(pcp) percpu_from_op("mov", pcp) 386 + #define this_cpu_read_4(pcp) percpu_from_op("mov", pcp) 416 387 #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 417 388 #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 418 389 #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) ··· 464 435 * 32 bit must fall back to generic operations. 465 436 */ 466 437 #ifdef CONFIG_X86_64 467 - #define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 438 + #define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp) 468 439 #define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 469 440 #define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 470 441 #define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) ··· 473 444 #define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 474 445 #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 475 446 476 - #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 447 + #define this_cpu_read_8(pcp) percpu_from_op("mov", pcp) 477 448 #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 478 449 #define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 479 450 #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
+2
arch/x86/kernel/vmlinux.lds.S
··· 186 186 * start another segment - init. 187 187 */ 188 188 PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) 189 + ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START, 190 + "per-CPU data too large - increase CONFIG_PHYSICAL_START") 189 191 #endif 190 192 191 193 INIT_TEXT_SECTION(PAGE_SIZE)