Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "powerpc/rtas: Implement reentrant rtas call"

At the time this was submitted by Leonardo, I confirmed -- or thought
I had confirmed -- with PowerVM partition firmware development that
the following RTAS functions:

- ibm,get-xive
- ibm,int-off
- ibm,int-on
- ibm,set-xive

were safe to call on multiple CPUs simultaneously, not only with
respect to themselves as indicated by PAPR, but with arbitrary other
RTAS calls:

https://lore.kernel.org/linuxppc-dev/875zcy2v8o.fsf@linux.ibm.com/

Recent discussion with firmware development makes it clear that this
is not true, and that the code in commit b664db8e3f97 ("powerpc/rtas:
Implement reentrant rtas call") is unsafe, likely explaining several
strange bugs we've seen in internal testing involving DLPAR and
LPM. These scenarios use ibm,configure-connector, whose internal state
can be corrupted by the concurrent use of the "reentrant" functions,
leading to symptoms like endless busy statuses from RTAS.

Fixes: b664db8e3f97 ("powerpc/rtas: Implement reentrant rtas call")
Cc: stable@vger.kernel.org # v5.8+
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Laurent Dufour <laurent.dufour@fr.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220907220111.223267-1-nathanl@linux.ibm.com

authored by

Nathan Lynch and committed by
Michael Ellerman
f88aabad 71a92e99

+11 -99
-1
arch/powerpc/include/asm/paca.h
··· 263 263 u64 l1d_flush_size; 264 264 #endif 265 265 #ifdef CONFIG_PPC_PSERIES 266 - struct rtas_args *rtas_args_reentrant; 267 266 u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */ 268 267 #endif /* CONFIG_PPC_PSERIES */ 269 268
-1
arch/powerpc/include/asm/rtas.h
··· 240 240 extern int rtas_token(const char *service); 241 241 extern int rtas_service_present(const char *service); 242 242 extern int rtas_call(int token, int, int, int *, ...); 243 - int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...); 244 243 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, 245 244 int nret, ...); 246 245 extern void __noreturn rtas_restart(char *cmd);
-32
arch/powerpc/kernel/paca.c
··· 16 16 #include <asm/kexec.h> 17 17 #include <asm/svm.h> 18 18 #include <asm/ultravisor.h> 19 - #include <asm/rtas.h> 20 19 21 20 #include "setup.h" 22 21 ··· 169 170 } 170 171 #endif /* CONFIG_PPC_64S_HASH_MMU */ 171 172 172 - #ifdef CONFIG_PPC_PSERIES 173 - /** 174 - * new_rtas_args() - Allocates rtas args 175 - * @cpu: CPU number 176 - * @limit: Memory limit for this allocation 177 - * 178 - * Allocates a struct rtas_args and return it's pointer, 179 - * if not in Hypervisor mode 180 - * 181 - * Return: Pointer to allocated rtas_args 182 - * NULL if CPU in Hypervisor Mode 183 - */ 184 - static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) 185 - { 186 - limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX); 187 - 188 - if (early_cpu_has_feature(CPU_FTR_HVMODE)) 189 - return NULL; 190 - 191 - return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES, 192 - limit, cpu); 193 - } 194 - #endif /* CONFIG_PPC_PSERIES */ 195 - 196 173 /* The Paca is an array with one entry per processor. Each contains an 197 174 * lppaca, which contains the information shared between the 198 175 * hypervisor and Linux. ··· 206 231 #ifdef CONFIG_PPC_BOOK3E 207 232 /* For now -- if we have threads this will be adjusted later */ 208 233 new_paca->tcd_ptr = &new_paca->tcd; 209 - #endif 210 - 211 - #ifdef CONFIG_PPC_PSERIES 212 - new_paca->rtas_args_reentrant = NULL; 213 234 #endif 214 235 } 215 236 ··· 278 307 #endif 279 308 #ifdef CONFIG_PPC_64S_HASH_MMU 280 309 paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); 281 - #endif 282 - #ifdef CONFIG_PPC_PSERIES 283 - paca->rtas_args_reentrant = new_rtas_args(cpu, limit); 284 310 #endif 285 311 paca_struct_size += sizeof(struct paca_struct); 286 312 }
-54
arch/powerpc/kernel/rtas.c
··· 43 43 #include <asm/time.h> 44 44 #include <asm/mmu.h> 45 45 #include <asm/topology.h> 46 - #include <asm/paca.h> 47 46 48 47 /* This is here deliberately so it's only used in this file */ 49 48 void enter_rtas(unsigned long); ··· 930 931 if (fwrc) 931 932 pr_err("ibm,activate-firmware failed (%i)\n", fwrc); 932 933 } 933 - 934 - #ifdef CONFIG_PPC_PSERIES 935 - /** 936 - * rtas_call_reentrant() - Used for reentrant rtas calls 937 - * @token: Token for desired reentrant RTAS call 938 - * @nargs: Number of Input Parameters 939 - * @nret: Number of Output Parameters 940 - * @outputs: Array of outputs 941 - * @...: Inputs for desired RTAS call 942 - * 943 - * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off", 944 - * "ibm,get-xive" and "ibm,set-xive" are currently reentrant. 945 - * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but 946 - * PACA one instead. 947 - * 948 - * Return: -1 on error, 949 - * First output value of RTAS call if (nret > 0), 950 - * 0 otherwise, 951 - */ 952 - int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) 953 - { 954 - va_list list; 955 - struct rtas_args *args; 956 - unsigned long flags; 957 - int i, ret = 0; 958 - 959 - if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) 960 - return -1; 961 - 962 - local_irq_save(flags); 963 - preempt_disable(); 964 - 965 - /* We use the per-cpu (PACA) rtas args buffer */ 966 - args = local_paca->rtas_args_reentrant; 967 - 968 - va_start(list, outputs); 969 - va_rtas_call_unlocked(args, token, nargs, nret, list); 970 - va_end(list); 971 - 972 - if (nret > 1 && outputs) 973 - for (i = 0; i < nret - 1; ++i) 974 - outputs[i] = be32_to_cpu(args->rets[i + 1]); 975 - 976 - if (nret > 0) 977 - ret = be32_to_cpu(args->rets[0]); 978 - 979 - local_irq_restore(flags); 980 - preempt_enable(); 981 - 982 - return ret; 983 - } 984 - 985 - #endif /* CONFIG_PPC_PSERIES */ 986 934 987 935 /** 988 936 * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
+11 -11
arch/powerpc/sysdev/xics/ics-rtas.c
··· 36 36 37 37 server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); 38 38 39 - call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, 40 - server, DEFAULT_PRIORITY); 39 + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server, 40 + DEFAULT_PRIORITY); 41 41 if (call_status != 0) { 42 42 printk(KERN_ERR 43 43 "%s: ibm_set_xive irq %u server %x returned %d\n", ··· 46 46 } 47 47 48 48 /* Now unmask the interrupt (often a no-op) */ 49 - call_status = rtas_call_reentrant(ibm_int_on, 1, 1, NULL, hw_irq); 49 + call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq); 50 50 if (call_status != 0) { 51 51 printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", 52 52 __func__, hw_irq, call_status); ··· 68 68 if (hw_irq == XICS_IPI) 69 69 return; 70 70 71 - call_status = rtas_call_reentrant(ibm_int_off, 1, 1, NULL, hw_irq); 71 + call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq); 72 72 if (call_status != 0) { 73 73 printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", 74 74 __func__, hw_irq, call_status); ··· 76 76 } 77 77 78 78 /* Have to set XIVE to 0xff to be able to remove a slot */ 79 - call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, 80 - xics_default_server, 0xff); 79 + call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, 80 + xics_default_server, 0xff); 81 81 if (call_status != 0) { 82 82 printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", 83 83 __func__, hw_irq, call_status); ··· 108 108 if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) 109 109 return -1; 110 110 111 - status = rtas_call_reentrant(ibm_get_xive, 1, 3, xics_status, hw_irq); 111 + status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq); 112 112 113 113 if (status) { 114 114 printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", ··· 126 126 pr_debug("%s: irq %d [hw 0x%x] server: 0x%x\n", __func__, d->irq, 127 127 hw_irq, irq_server); 128 128 129 - status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, 130 - hw_irq, irq_server, xics_status[1]); 129 + status = rtas_call(ibm_set_xive, 3, 1, NULL, 130 + hw_irq, irq_server, xics_status[1]); 131 131 132 132 if (status) { 133 133 printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", ··· 158 158 return -EINVAL; 159 159 160 160 /* Check if RTAS knows about this interrupt */ 161 - rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, hw_irq); 161 + rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq); 162 162 if (rc) 163 163 return -ENXIO; 164 164 ··· 174 174 { 175 175 int rc, status[2]; 176 176 177 - rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, vec); 177 + rc = rtas_call(ibm_get_xive, 1, 3, status, vec); 178 178 if (rc) 179 179 return -1; 180 180 return status[0];