Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390: Replace __get_cpu_var uses

__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.

Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.

__get_cpu_var() is defined as :

#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))

__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.

this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.

This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.

At the end of the patch set all uses of __get_cpu_var have been removed so
the macro is removed too.

The patch set includes passes over all arches as well. Once these operations
are used throughout then specialized macros can be defined in non -x86
arches as well in order to optimize per cpu access by f.e. using a global
register that may be set to the per cpu base.

Transformations done to __get_cpu_var()

1. Determine the address of the percpu instance of the current processor.

DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);

Converts to

int *x = this_cpu_ptr(&y);

2. Same as #1 but this time an array structure is involved.

DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);

Converts to

int *x = this_cpu_ptr(y);

3. Retrieve the content of the current processors instance of a per cpu
variable.

DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)

Converts to

int x = __this_cpu_read(y);

4. Retrieve the content of a percpu struct

DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);

Converts to

memcpy(&x, this_cpu_ptr(&y), sizeof(x));

5. Assignment to a per cpu variable

DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;

Converts to

this_cpu_write(y, x);

6. Increment/Decrement etc of a per cpu variable

DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++

Converts to

this_cpu_inc(y)

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
CC: linux390@de.ibm.com
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

authored by

Christoph Lameter and committed by
Tejun Heo
eb7e7d76 35898716

+48 -44
+1 -1
arch/s390/include/asm/cputime.h
··· 184 184 185 185 static inline int s390_nohz_delay(int cpu) 186 186 { 187 - return __get_cpu_var(s390_idle).nohz_delay != 0; 187 + return __this_cpu_read(s390_idle.nohz_delay) != 0; 188 188 } 189 189 190 190 #define arch_needs_cpu(cpu) s390_nohz_delay(cpu)
+1 -1
arch/s390/include/asm/irq.h
··· 81 81 82 82 static __always_inline void inc_irq_stat(enum interruption_class irq) 83 83 { 84 - __get_cpu_var(irq_stat).irqs[irq]++; 84 + __this_cpu_inc(irq_stat.irqs[irq]); 85 85 } 86 86 87 87 struct ext_code {
+8 -8
arch/s390/include/asm/percpu.h
··· 31 31 pcp_op_T__ old__, new__, prev__; \ 32 32 pcp_op_T__ *ptr__; \ 33 33 preempt_disable(); \ 34 - ptr__ = __this_cpu_ptr(&(pcp)); \ 34 + ptr__ = raw_cpu_ptr(&(pcp)); \ 35 35 prev__ = *ptr__; \ 36 36 do { \ 37 37 old__ = prev__; \ ··· 70 70 pcp_op_T__ val__ = (val); \ 71 71 pcp_op_T__ old__, *ptr__; \ 72 72 preempt_disable(); \ 73 - ptr__ = __this_cpu_ptr(&(pcp)); \ 73 + ptr__ = raw_cpu_ptr(&(pcp)); \ 74 74 if (__builtin_constant_p(val__) && \ 75 75 ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ 76 76 asm volatile( \ ··· 97 97 pcp_op_T__ val__ = (val); \ 98 98 pcp_op_T__ old__, *ptr__; \ 99 99 preempt_disable(); \ 100 - ptr__ = __this_cpu_ptr(&(pcp)); \ 100 + ptr__ = raw_cpu_ptr(&(pcp)); \ 101 101 asm volatile( \ 102 102 op " %[old__],%[val__],%[ptr__]\n" \ 103 103 : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ ··· 116 116 pcp_op_T__ val__ = (val); \ 117 117 pcp_op_T__ old__, *ptr__; \ 118 118 preempt_disable(); \ 119 - ptr__ = __this_cpu_ptr(&(pcp)); \ 119 + ptr__ = raw_cpu_ptr(&(pcp)); \ 120 120 asm volatile( \ 121 121 op " %[old__],%[val__],%[ptr__]\n" \ 122 122 : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ ··· 138 138 pcp_op_T__ ret__; \ 139 139 pcp_op_T__ *ptr__; \ 140 140 preempt_disable(); \ 141 - ptr__ = __this_cpu_ptr(&(pcp)); \ 141 + ptr__ = raw_cpu_ptr(&(pcp)); \ 142 142 ret__ = cmpxchg(ptr__, oval, nval); \ 143 143 preempt_enable(); \ 144 144 ret__; \ ··· 154 154 typeof(pcp) *ptr__; \ 155 155 typeof(pcp) ret__; \ 156 156 preempt_disable(); \ 157 - ptr__ = __this_cpu_ptr(&(pcp)); \ 157 + ptr__ = raw_cpu_ptr(&(pcp)); \ 158 158 ret__ = xchg(ptr__, nval); \ 159 159 preempt_enable(); \ 160 160 ret__; \ ··· 173 173 typeof(pcp2) *p2__; \ 174 174 int ret__; \ 175 175 preempt_disable(); \ 176 - p1__ = __this_cpu_ptr(&(pcp1)); \ 177 - p2__ = __this_cpu_ptr(&(pcp2)); \ 176 + p1__ = raw_cpu_ptr(&(pcp1)); \ 177 + p2__ = raw_cpu_ptr(&(pcp2)); \ 178 178 ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ 179 179 preempt_enable(); \ 180 180 ret__; \
+1 -1
arch/s390/kernel/irq.c
··· 258 258 259 259 ext_code = *(struct ext_code *) &regs->int_code; 260 260 if (ext_code.code != EXT_IRQ_CLK_COMP) 261 - __get_cpu_var(s390_idle).nohz_delay = 1; 261 + __this_cpu_write(s390_idle.nohz_delay, 1); 262 262 263 263 index = ext_hash(ext_code.code); 264 264 rcu_read_lock();
+4 -4
arch/s390/kernel/kprobes.c
··· 366 366 */ 367 367 static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) 368 368 { 369 - kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe); 369 + kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe); 370 370 kcb->prev_kprobe.status = kcb->kprobe_status; 371 - __get_cpu_var(current_kprobe) = p; 371 + __this_cpu_write(current_kprobe, p); 372 372 } 373 373 374 374 /* ··· 378 378 */ 379 379 static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb) 380 380 { 381 - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 381 + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); 382 382 kcb->kprobe_status = kcb->prev_kprobe.status; 383 383 } 384 384 ··· 459 459 enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn); 460 460 return 1; 461 461 } else if (kprobe_running()) { 462 - p = __get_cpu_var(current_kprobe); 462 + p = __this_cpu_read(current_kprobe); 463 463 if (p->break_handler && p->break_handler(p, regs)) { 464 464 /* 465 465 * Continuation after the jprobe completed and
+7 -3
arch/s390/kernel/nmi.c
··· 53 53 */ 54 54 local_irq_save(flags); 55 55 local_mcck_disable(); 56 - mcck = __get_cpu_var(cpu_mcck); 57 - memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); 56 + /* 57 + * Ummm... Does this make sense at all? Copying the percpu struct 58 + * and then zapping it one statement later? 59 + */ 60 + memcpy(&mcck, this_cpu_ptr(&cpu_mcck), sizeof(mcck)); 61 + memset(&mcck, 0, sizeof(struct mcck_struct)); 58 62 clear_cpu_flag(CIF_MCCK_PENDING); 59 63 local_mcck_enable(); 60 64 local_irq_restore(flags); ··· 257 253 nmi_enter(); 258 254 inc_irq_stat(NMI_NMI); 259 255 mci = (struct mci *) &S390_lowcore.mcck_interruption_code; 260 - mcck = &__get_cpu_var(cpu_mcck); 256 + mcck = this_cpu_ptr(&cpu_mcck); 261 257 umode = user_mode(regs); 262 258 263 259 if (mci->sd) {
+11 -11
arch/s390/kernel/perf_cpum_cf.c
··· 173 173 */ 174 174 static void cpumf_pmu_enable(struct pmu *pmu) 175 175 { 176 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 176 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 177 177 int err; 178 178 179 179 if (cpuhw->flags & PMU_F_ENABLED) ··· 196 196 */ 197 197 static void cpumf_pmu_disable(struct pmu *pmu) 198 198 { 199 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 199 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 200 200 int err; 201 201 u64 inactive; 202 202 ··· 230 230 return; 231 231 232 232 inc_irq_stat(IRQEXT_CMC); 233 - cpuhw = &__get_cpu_var(cpu_hw_events); 233 + cpuhw = this_cpu_ptr(&cpu_hw_events); 234 234 235 235 /* Measurement alerts are shared and might happen when the PMU 236 236 * is not reserved. Ignore these alerts in this case. */ ··· 250 250 #define PMC_RELEASE 1 251 251 static void setup_pmc_cpu(void *flags) 252 252 { 253 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 253 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 254 254 255 255 switch (*((int *) flags)) { 256 256 case PMC_INIT: ··· 475 475 476 476 static void cpumf_pmu_start(struct perf_event *event, int flags) 477 477 { 478 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 478 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 479 479 struct hw_perf_event *hwc = &event->hw; 480 480 481 481 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) ··· 506 506 507 507 static void cpumf_pmu_stop(struct perf_event *event, int flags) 508 508 { 509 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 509 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 510 510 struct hw_perf_event *hwc = &event->hw; 511 511 512 512 if (!(hwc->state & PERF_HES_STOPPED)) { ··· 527 527 528 528 static int cpumf_pmu_add(struct perf_event *event, int flags) 529 529 { 530 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 530 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 531 531 532 532 /* Check authorization for the counter set to which this 533 533 * counter belongs. ··· 551 551 552 552 static void cpumf_pmu_del(struct perf_event *event, int flags) 553 553 { 554 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 554 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 555 555 556 556 cpumf_pmu_stop(event, PERF_EF_UPDATE); 557 557 ··· 575 575 */ 576 576 static void cpumf_pmu_start_txn(struct pmu *pmu) 577 577 { 578 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 578 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 579 579 580 580 perf_pmu_disable(pmu); 581 581 cpuhw->flags |= PERF_EVENT_TXN; ··· 589 589 */ 590 590 static void cpumf_pmu_cancel_txn(struct pmu *pmu) 591 591 { 592 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 592 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 593 593 594 594 WARN_ON(cpuhw->tx_state != cpuhw->state); 595 595 ··· 604 604 */ 605 605 static int cpumf_pmu_commit_txn(struct pmu *pmu) 606 606 { 607 - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 607 + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); 608 608 u64 state; 609 609 610 610 /* check if the updated state can be scheduled */
+8 -8
arch/s390/kernel/perf_cpum_sf.c
··· 562 562 static void setup_pmc_cpu(void *flags) 563 563 { 564 564 int err; 565 - struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); 565 + struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf); 566 566 567 567 err = 0; 568 568 switch (*((int *) flags)) { ··· 849 849 850 850 static void cpumsf_pmu_enable(struct pmu *pmu) 851 851 { 852 - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); 852 + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); 853 853 struct hw_perf_event *hwc; 854 854 int err; 855 855 ··· 898 898 899 899 static void cpumsf_pmu_disable(struct pmu *pmu) 900 900 { 901 - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); 901 + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); 902 902 struct hws_lsctl_request_block inactive; 903 903 struct hws_qsi_info_block si; 904 904 int err; ··· 1306 1306 */ 1307 1307 static void cpumsf_pmu_start(struct perf_event *event, int flags) 1308 1308 { 1309 - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); 1309 + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); 1310 1310 1311 1311 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 1312 1312 return; ··· 1327 1327 */ 1328 1328 static void cpumsf_pmu_stop(struct perf_event *event, int flags) 1329 1329 { 1330 - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); 1330 + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); 1331 1331 1332 1332 if (event->hw.state & PERF_HES_STOPPED) 1333 1333 return; ··· 1346 1346 1347 1347 static int cpumsf_pmu_add(struct perf_event *event, int flags) 1348 1348 { 1349 - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); 1349 + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); 1350 1350 int err; 1351 1351 1352 1352 if (cpuhw->flags & PMU_F_IN_USE) ··· 1397 1397 1398 1398 static void cpumsf_pmu_del(struct perf_event *event, int flags) 1399 1399 { 1400 - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); 1400 + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); 1401 1401 1402 1402 perf_pmu_disable(event->pmu); 1403 1403 cpumsf_pmu_stop(event, PERF_EF_UPDATE); ··· 1470 1470 if (!(alert & CPU_MF_INT_SF_MASK)) 1471 1471 return; 1472 1472 inc_irq_stat(IRQEXT_CMS); 1473 - cpuhw = &__get_cpu_var(cpu_hw_sf); 1473 + cpuhw = this_cpu_ptr(&cpu_hw_sf); 1474 1474 1475 1475 /* Measurement alerts are shared and might happen when the PMU 1476 1476 * is not reserved. Ignore these alerts in this case. */
+2 -2
arch/s390/kernel/processor.c
··· 23 23 */ 24 24 void cpu_init(void) 25 25 { 26 - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); 27 - struct cpuid *id = &__get_cpu_var(cpu_id); 26 + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); 27 + struct cpuid *id = this_cpu_ptr(&cpu_id); 28 28 29 29 get_cpu_id(id); 30 30 atomic_inc(&init_mm.mm_count);
+3 -3
arch/s390/kernel/time.c
··· 92 92 struct clock_event_device *cd; 93 93 94 94 S390_lowcore.clock_comparator = -1ULL; 95 - cd = &__get_cpu_var(comparators); 95 + cd = this_cpu_ptr(&comparators); 96 96 cd->event_handler(cd); 97 97 } 98 98 ··· 360 360 */ 361 361 static void disable_sync_clock(void *dummy) 362 362 { 363 - atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); 363 + atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word); 364 364 /* 365 365 * Clear the in-sync bit 2^31. All get_sync_clock calls will 366 366 * fail until the sync bit is turned back on. In addition ··· 377 377 */ 378 378 static void enable_sync_clock(void) 379 379 { 380 - atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); 380 + atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word); 381 381 atomic_set_mask(0x80000000, sw_ptr); 382 382 } 383 383
+1 -1
arch/s390/kernel/vtime.c
··· 154 154 155 155 void __kprobes vtime_stop_cpu(void) 156 156 { 157 - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); 157 + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); 158 158 unsigned long long idle_time; 159 159 unsigned long psw_mask; 160 160
+1 -1
arch/s390/oprofile/hwsampler.c
··· 178 178 static void hws_ext_handler(struct ext_code ext_code, 179 179 unsigned int param32, unsigned long param64) 180 180 { 181 - struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer); 181 + struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer); 182 182 183 183 if (!(param32 & CPU_MF_INT_SF_MASK)) 184 184 return;