Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sh: Replace __get_cpu_var uses

__get_cpu_var() is used for multiple purposes in the kernel source. One
of them is address calculation via the form &__get_cpu_var(x). This
calculates the address for the instance of the percpu variable of the
current processor based on an offset.

Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.

__get_cpu_var() is defined as :

#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))

__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.

this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.

This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less
registers are used when code is generated.

At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.

The patch set includes passes over all arches as well. Once these
operations are used throughout then specialized macros can be defined in
non -x86 arches as well in order to optimize per cpu access by f.e. using
a global register that may be set to the per cpu base.

Transformations done to __get_cpu_var()

1. Determine the address of the percpu instance of the current processor.

DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);

Converts to

int *x = this_cpu_ptr(&y);

2. Same as #1 but this time an array structure is involved.

DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);

Converts to

int *x = this_cpu_ptr(y);

3. Retrieve the content of the current processors instance of a per cpu
variable.

DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)

Converts to

int x = __this_cpu_read(y);

4. Retrieve the content of a percpu struct

DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);

Converts to

memcpy(&x, this_cpu_ptr(&y), sizeof(x));

5. Assignment to a per cpu variable

DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;

Converts to

__this_cpu_write(y, x);

6. Increment/Decrement etc of a per cpu variable

DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++

Converts to

__this_cpu_inc(y)

Signed-off-by: Christoph Lameter <cl@linux.com>
Tested-by: Geert Uytterhoeven <geert@linux-m68k.org> [compilation only]
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Christoph Lameter and committed by
Linus Torvalds
c473b2c6 504e0e2f

+23 -23
+2 -2
arch/sh/kernel/hw_breakpoint.c
··· 52 52 int i; 53 53 54 54 for (i = 0; i < sh_ubc->num_events; i++) { 55 - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); 55 + struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 56 56 57 57 if (!*slot) { 58 58 *slot = bp; ··· 84 84 int i; 85 85 86 86 for (i = 0; i < sh_ubc->num_events; i++) { 87 - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); 87 + struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); 88 88 89 89 if (*slot == bp) { 90 90 *slot = NULL;
+15 -15
arch/sh/kernel/kprobes.c
··· 102 102 103 103 void __kprobes arch_remove_kprobe(struct kprobe *p) 104 104 { 105 - struct kprobe *saved = &__get_cpu_var(saved_next_opcode); 105 + struct kprobe *saved = this_cpu_ptr(&saved_next_opcode); 106 106 107 107 if (saved->addr) { 108 108 arch_disarm_kprobe(p); ··· 111 111 saved->addr = NULL; 112 112 saved->opcode = 0; 113 113 114 - saved = &__get_cpu_var(saved_next_opcode2); 114 + saved = this_cpu_ptr(&saved_next_opcode2); 115 115 if (saved->addr) { 116 116 arch_disarm_kprobe(saved); 117 117 ··· 129 129 130 130 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) 131 131 { 132 - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 132 + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); 133 133 kcb->kprobe_status = kcb->prev_kprobe.status; 134 134 } 135 135 136 136 static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 137 137 struct kprobe_ctlblk *kcb) 138 138 { 139 - __get_cpu_var(current_kprobe) = p; 139 + __this_cpu_write(current_kprobe, p); 140 140 } 141 141 142 142 /* ··· 146 146 */ 147 147 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 148 148 { 149 - __get_cpu_var(saved_current_opcode).addr = (kprobe_opcode_t *)regs->pc; 149 + __this_cpu_write(saved_current_opcode.addr, (kprobe_opcode_t *)regs->pc); 150 150 151 151 if (p != NULL) { 152 152 struct kprobe *op1, *op2; 153 153 154 154 arch_disarm_kprobe(p); 155 155 156 - op1 = &__get_cpu_var(saved_next_opcode); 157 - op2 = &__get_cpu_var(saved_next_opcode2); 156 + op1 = this_cpu_ptr(&saved_next_opcode); 157 + op2 = this_cpu_ptr(&saved_next_opcode2); 158 158 159 159 if (OPCODE_JSR(p->opcode) || OPCODE_JMP(p->opcode)) { 160 160 unsigned int reg_nr = ((p->opcode >> 8) & 0x000F); ··· 249 249 kcb->kprobe_status = KPROBE_REENTER; 250 250 return 1; 251 251 } else { 252 - p = __get_cpu_var(current_kprobe); 252 + p = __this_cpu_read(current_kprobe); 253 253 if (p->break_handler && p->break_handler(p, regs)) { 254 254 goto ss_probe; 255 255 } ··· 336 336 continue; 337 337 338 338 if (ri->rp && ri->rp->handler) { 339 - __get_cpu_var(current_kprobe) = &ri->rp->kp; 339 + __this_cpu_write(current_kprobe, &ri->rp->kp); 340 340 ri->rp->handler(ri, regs); 341 - __get_cpu_var(current_kprobe) = NULL; 341 + __this_cpu_write(current_kprobe, NULL); 342 342 } 343 343 344 344 orig_ret_address = (unsigned long)ri->ret_addr; ··· 383 383 cur->post_handler(cur, regs, 0); 384 384 } 385 385 386 - p = &__get_cpu_var(saved_next_opcode); 386 + p = this_cpu_ptr(&saved_next_opcode); 387 387 if (p->addr) { 388 388 arch_disarm_kprobe(p); 389 389 p->addr = NULL; 390 390 p->opcode = 0; 391 391 392 - addr = __get_cpu_var(saved_current_opcode).addr; 393 - __get_cpu_var(saved_current_opcode).addr = NULL; 392 + addr = __this_cpu_read(saved_current_opcode.addr); 393 + __this_cpu_write(saved_current_opcode.addr, NULL); 394 394 395 395 p = get_kprobe(addr); 396 396 arch_arm_kprobe(p); 397 397 398 - p = &__get_cpu_var(saved_next_opcode2); 398 + p = this_cpu_ptr(&saved_next_opcode2); 399 399 if (p->addr) { 400 400 arch_disarm_kprobe(p); 401 401 p->addr = NULL; ··· 511 511 if (kprobe_handler(args->regs)) { 512 512 ret = NOTIFY_STOP; 513 513 } else { 514 - p = __get_cpu_var(current_kprobe); 514 + p = __this_cpu_read(current_kprobe); 515 515 if (p->break_handler && 516 516 p->break_handler(p, args->regs)) 517 517 ret = NOTIFY_STOP;
+1 -1
arch/sh/kernel/localtimer.c
··· 32 32 */ 33 33 void local_timer_interrupt(void) 34 34 { 35 - struct clock_event_device *clk = &__get_cpu_var(local_clockevent); 35 + struct clock_event_device *clk = this_cpu_ptr(&local_clockevent); 36 36 37 37 irq_enter(); 38 38 clk->event_handler(clk);
+4 -4
arch/sh/kernel/perf_event.c
··· 227 227 228 228 static void sh_pmu_stop(struct perf_event *event, int flags) 229 229 { 230 - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 230 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 231 231 struct hw_perf_event *hwc = &event->hw; 232 232 int idx = hwc->idx; 233 233 ··· 245 245 246 246 static void sh_pmu_start(struct perf_event *event, int flags) 247 247 { 248 - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 248 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 249 249 struct hw_perf_event *hwc = &event->hw; 250 250 int idx = hwc->idx; 251 251 ··· 262 262 263 263 static void sh_pmu_del(struct perf_event *event, int flags) 264 264 { 265 - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 265 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 266 266 267 267 sh_pmu_stop(event, PERF_EF_UPDATE); 268 268 __clear_bit(event->hw.idx, cpuc->used_mask); ··· 272 272 273 273 static int sh_pmu_add(struct perf_event *event, int flags) 274 274 { 275 - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 275 + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 276 276 struct hw_perf_event *hwc = &event->hw; 277 277 int idx = hwc->idx; 278 278 int ret = -EAGAIN;
+1 -1
arch/sh/kernel/smp.c
··· 111 111 irq_ctx_exit(raw_smp_processor_id()); 112 112 mb(); 113 113 114 - __get_cpu_var(cpu_state) = CPU_DEAD; 114 + __this_cpu_write(cpu_state, CPU_DEAD); 115 115 local_irq_disable(); 116 116 } 117 117