Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: 7587/1: implement optimized percpu variable access

Use the previously unused TPIDRPRW register to store percpu offsets.
TPIDRPRW is only accessible in PL1, so it can only be used in the kernel.

This replaces 2 loads with a mrc instruction for each percpu variable
access. With hackbench, the performance improvement is 1.4% on Cortex-A9
(highbank). Taking an average of 30 runs of "hackbench -l 1000" yields:

Before: 6.2191
After: 6.1348

Will Deacon reported similar delta on v6 with 11MPCore.

The asm "memory clobber" are needed here to ensure the percpu offset
gets reloaded. Testing by Will found that this would not happen in
__schedule() which is a bit of a special case as preemption is disabled
but the execution can move cores.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

authored by

Rob Herring and committed by
Russell King
14318efb 3e99675a

+54 -2
-1
arch/arm/include/asm/Kbuild
··· 16 16 generic-y += msgbuf.h 17 17 generic-y += param.h 18 18 generic-y += parport.h 19 - generic-y += percpu.h 20 19 generic-y += poll.h 21 20 generic-y += resource.h 22 21 generic-y += sections.h
+45
arch/arm/include/asm/percpu.h
··· 1 + /* 2 + * Copyright 2012 Calxeda, Inc. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + * 13 + * You should have received a copy of the GNU General Public License along with 14 + * this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + #ifndef _ASM_ARM_PERCPU_H_ 17 + #define _ASM_ARM_PERCPU_H_ 18 + 19 + /* 20 + * Same as asm-generic/percpu.h, except that we store the per cpu offset 21 + * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7 22 + */ 23 + #if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6) 24 + static inline void set_my_cpu_offset(unsigned long off) 25 + { 26 + /* Set TPIDRPRW */ 27 + asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory"); 28 + } 29 + 30 + static inline unsigned long __my_cpu_offset(void) 31 + { 32 + unsigned long off; 33 + /* Read TPIDRPRW */ 34 + asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) : : "memory"); 35 + return off; 36 + } 37 + #define __my_cpu_offset __my_cpu_offset() 38 + #else 39 + #define set_my_cpu_offset(x) do {} while(0) 40 + 41 + #endif /* CONFIG_SMP */ 42 + 43 + #include <asm-generic/percpu.h> 44 + 45 + #endif /* _ASM_ARM_PERCPU_H_ */
+6
arch/arm/kernel/setup.c
··· 383 383 BUG(); 384 384 } 385 385 386 + /* 387 + * This only works on resume and secondary cores. For booting on the 388 + * boot cpu, smp_prepare_boot_cpu is called after percpu area setup. 389 + */ 390 + set_my_cpu_offset(per_cpu_offset(cpu)); 391 + 386 392 cpu_proc_init(); 387 393 388 394 /*
+3 -1
arch/arm/kernel/smp.c
··· 314 314 current->active_mm = mm; 315 315 cpumask_set_cpu(cpu, mm_cpumask(mm)); 316 316 317 + cpu_init(); 318 + 317 319 printk("CPU%u: Booted secondary processor\n", cpu); 318 320 319 - cpu_init(); 320 321 preempt_disable(); 321 322 trace_hardirqs_off(); 322 323 ··· 373 372 374 373 void __init smp_prepare_boot_cpu(void) 375 374 { 375 + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); 376 376 } 377 377 378 378 void __init smp_prepare_cpus(unsigned int max_cpus)