s390/wti: Prepare graceful CPU pre-emption on wti reception

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

When a warning track interrupt is received, the kernel has only a very
limited amount of time to make sure, that the CPU can be yielded as
gracefully as possible before being pre-empted by the hypervisor.

The interrupt handler for the wti therefore unparks a kernel thread
which has being created on boot re-using the CPU hotplug kernel thread
infrastructure. These threads exist per CPU and are assigned the
highest possible real-time priority. This makes sure, that said threads
will execute as soon as possible as the scheduler should pre-empt any
other running user tasks to run the real-time thread.

Furthermore, the interrupt handler disables all I/O interrupts to
prevent additional interrupt processing on the soon-preempted CPU.
Interrupt handlers are likely to take kernel locks, which in the worst
case, will be kept while the interrupt handler is pre-empted from itself
underlying physical CPU. In that case, all tasks or interrupt handlers
on other CPUs would have to wait for the pre-empted CPU being dispatched
again. By preventing further interrupt processing, this risk is
minimized.

Once the CPU gets dispatched again, the real-time kernel thread regains
control, reenables interrupts and parks itself again.

Acked-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Mete Durlu <meted@linux.ibm.com>
Signed-off-by: Tobias Huschle <huschle@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>

authored by

Tobias Huschle and committed by

Vasily Gorbik 2 years ago cafeff5a 2c6c9ccc

+142 -1

2 changed files

expand all

arch

s390

kernel

Makefile

wti.c

+1 -1

arch/s390/kernel/Makefile

··· 43 43 obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o 44 44 obj-y += entry.o reipl.o kdebugfs.o alternative.o 45 45 obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o 46 - obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o 46 + obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o 47 47 48 48 extra-y += vmlinux.lds 49 49

+141

arch/s390/kernel/wti.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Support for warning track interruption 4 + * 5 + * Copyright IBM Corp. 2023 6 + */ 7 + 8 + #include <linux/smpboot.h> 9 + #include <linux/irq.h> 10 + #include <uapi/linux/sched/types.h> 11 + #include <asm/diag.h> 12 + #include <asm/sclp.h> 13 + 14 + struct wti_state { 15 + /* 16 + * Represents the real-time thread responsible to 17 + * acknowledge the warning-track interrupt and trigger 18 + * preliminary and postliminary precautions. 19 + */ 20 + struct task_struct *thread; 21 + /* 22 + * If pending is true, the real-time thread must be scheduled. 23 + * If not, a wake up of that thread will remain a noop. 24 + */ 25 + bool pending; 26 + }; 27 + 28 + static DEFINE_PER_CPU(struct wti_state, wti_state); 29 + 30 + /* 31 + * During a warning-track grace period, interrupts are disabled 32 + * to prevent delays of the warning-track acknowledgment. 33 + * 34 + * Once the CPU is physically dispatched again, interrupts are 35 + * re-enabled. 36 + */ 37 + 38 + static void wti_irq_disable(void) 39 + { 40 + unsigned long flags; 41 + struct ctlreg cr6; 42 + 43 + local_irq_save(flags); 44 + local_ctl_store(6, &cr6); 45 + /* disable all I/O interrupts */ 46 + cr6.val &= ~0xff000000UL; 47 + local_ctl_load(6, &cr6); 48 + local_irq_restore(flags); 49 + } 50 + 51 + static void wti_irq_enable(void) 52 + { 53 + unsigned long flags; 54 + struct ctlreg cr6; 55 + 56 + local_irq_save(flags); 57 + local_ctl_store(6, &cr6); 58 + /* enable all I/O interrupts */ 59 + cr6.val |= 0xff000000UL; 60 + local_ctl_load(6, &cr6); 61 + local_irq_restore(flags); 62 + } 63 + 64 + static void wti_interrupt(struct ext_code ext_code, 65 + unsigned int param32, unsigned long param64) 66 + { 67 + struct wti_state *st = this_cpu_ptr(&wti_state); 68 + 69 + inc_irq_stat(IRQEXT_WTI); 70 + wti_irq_disable(); 71 + st->pending = true; 72 + wake_up_process(st->thread); 73 + } 74 + 75 + static int wti_pending(unsigned int cpu) 76 + { 77 + struct wti_state *st = per_cpu_ptr(&wti_state, cpu); 78 + 79 + return st->pending; 80 + } 81 + 82 + static void wti_thread_fn(unsigned int cpu) 83 + { 84 + struct wti_state *st = per_cpu_ptr(&wti_state, cpu); 85 + 86 + st->pending = false; 87 + /* 88 + * Yield CPU voluntarily to the hypervisor. Control 89 + * resumes when hypervisor decides to dispatch CPU 90 + * to this LPAR again. 91 + */ 92 + diag49c(DIAG49C_SUBC_ACK); 93 + wti_irq_enable(); 94 + } 95 + 96 + static struct smp_hotplug_thread wti_threads = { 97 + .store = &wti_state.thread, 98 + .thread_should_run = wti_pending, 99 + .thread_fn = wti_thread_fn, 100 + .thread_comm = "cpuwti/%u", 101 + .selfparking = false, 102 + }; 103 + 104 + static int __init wti_init(void) 105 + { 106 + struct sched_param wti_sched_param = { .sched_priority = MAX_RT_PRIO - 1 }; 107 + struct wti_state *st; 108 + int cpu, rc; 109 + 110 + rc = -EOPNOTSUPP; 111 + if (!sclp.has_wti) 112 + goto out; 113 + rc = smpboot_register_percpu_thread(&wti_threads); 114 + if (WARN_ON(rc)) 115 + goto out; 116 + for_each_online_cpu(cpu) { 117 + st = per_cpu_ptr(&wti_state, cpu); 118 + sched_setscheduler(st->thread, SCHED_FIFO, &wti_sched_param); 119 + } 120 + rc = register_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt); 121 + if (rc) { 122 + pr_warn("Couldn't request external interrupt 0x1007\n"); 123 + goto out_thread; 124 + } 125 + irq_subclass_register(IRQ_SUBCLASS_WARNING_TRACK); 126 + rc = diag49c(DIAG49C_SUBC_REG); 127 + if (rc) { 128 + pr_warn("Failed to register warning track interrupt through DIAG 49C\n"); 129 + rc = -EOPNOTSUPP; 130 + goto out_subclass; 131 + } 132 + goto out; 133 + out_subclass: 134 + irq_subclass_unregister(IRQ_SUBCLASS_WARNING_TRACK); 135 + unregister_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt); 136 + out_thread: 137 + smpboot_unregister_percpu_thread(&wti_threads); 138 + out: 139 + return rc; 140 + } 141 + late_initcall(wti_init);