Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/pvops/msr: Refactor pv_cpu_ops.write_msr{,_safe}()

An MSR value is represented as a 64-bit unsigned integer, with existing
MSR instructions storing it in EDX:EAX as two 32-bit segments.

The new immediate form MSR instructions, however, utilize a 64-bit
general-purpose register to store the MSR value. To unify the usage of
all MSR instructions, let the default MSR access APIs accept an MSR
value as a single 64-bit argument instead of two 32-bit segments.

The dual 32-bit APIs are still available as convenient wrappers over the
APIs that handle an MSR value as a single 64-bit argument.

The following illustrates the updated derivation of the MSR write APIs:

__wrmsrq(u32 msr, u64 val)
/ \
/ \
native_wrmsrq(msr, val) native_wrmsr(msr, low, high)
|
|
native_write_msr(msr, val)
/ \
/ \
wrmsrq(msr, val) wrmsr(msr, low, high)

When CONFIG_PARAVIRT is enabled, wrmsrq() and wrmsr() are defined on top
of paravirt_write_msr():

paravirt_write_msr(u32 msr, u64 val)
/ \
/ \
wrmsrq(msr, val) wrmsr(msr, low, high)

paravirt_write_msr() invokes cpu.write_msr(msr, val), an indirect layer
of pv_ops MSR write call:

If on native:

cpu.write_msr = native_write_msr

If on Xen:

cpu.write_msr = xen_write_msr

Therefore, refactor pv_cpu_ops.write_msr{_safe}() to accept an MSR value
in a single u64 argument, replacing the current dual u32 arguments.

No functional change intended.

Signed-off-by: Xin Li (Intel) <xin@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Juergen Gross <jgross@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Link: https://lore.kernel.org/r/20250427092027.1598740-14-xin@zytor.com

authored by

Xin Li (Intel) and committed by
Ingo Molnar
0c2678ef 2b7e2530

+46 -67
+15 -20
arch/x86/include/asm/msr.h
··· 75 75 return EAX_EDX_VAL(val, low, high); 76 76 } 77 77 78 - static __always_inline void __wrmsr(u32 msr, u32 low, u32 high) 78 + static __always_inline void __wrmsrq(u32 msr, u64 val) 79 79 { 80 80 asm volatile("1: wrmsr\n" 81 81 "2:\n" 82 82 _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR) 83 - : : "c" (msr), "a"(low), "d" (high) : "memory"); 83 + : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)) : "memory"); 84 84 } 85 85 86 86 #define native_rdmsr(msr, val1, val2) \ ··· 96 96 } 97 97 98 98 #define native_wrmsr(msr, low, high) \ 99 - __wrmsr(msr, low, high) 99 + __wrmsrq((msr), (u64)(high) << 32 | (low)) 100 100 101 101 #define native_wrmsrq(msr, val) \ 102 - __wrmsr((msr), (u32)((u64)(val)), \ 103 - (u32)((u64)(val) >> 32)) 102 + __wrmsrq((msr), (val)) 104 103 105 104 static inline u64 native_read_msr(u32 msr) 106 105 { ··· 128 129 } 129 130 130 131 /* Can be uninlined because referenced by paravirt */ 131 - static inline void notrace 132 - native_write_msr(u32 msr, u32 low, u32 high) 132 + static inline void notrace native_write_msr(u32 msr, u64 val) 133 133 { 134 - u64 val = (u64)high << 32 | low; 135 - 136 134 native_wrmsrq(msr, val); 137 135 138 136 if (tracepoint_enabled(write_msr)) ··· 137 141 } 138 142 139 143 /* Can be uninlined because referenced by paravirt */ 140 - static inline int notrace 141 - native_write_msr_safe(u32 msr, u32 low, u32 high) 144 + static inline int notrace native_write_msr_safe(u32 msr, u64 val) 142 145 { 143 146 int err; 144 147 ··· 145 150 "2:\n\t" 146 151 _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err]) 147 152 : [err] "=a" (err) 148 - : "c" (msr), "0" (low), "d" (high) 153 + : "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32)) 149 154 : "memory"); 150 155 if (tracepoint_enabled(write_msr)) 151 - do_trace_write_msr(msr, ((u64)high << 32 | low), err); 156 + do_trace_write_msr(msr, val, err); 152 157 return err; 153 158 } 154 159 ··· 184 189 185 190 static inline void wrmsr(u32 msr, u32 low, u32 high) 186 191 { 187 - native_write_msr(msr, low, high); 192 + native_write_msr(msr, (u64)high << 32 | low); 188 193 } 189 194 190 195 #define rdmsrq(msr, val) \ ··· 192 197 193 198 static inline void wrmsrq(u32 msr, u64 val) 194 199 { 195 - native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32)); 200 + native_write_msr(msr, val); 196 201 } 197 202 198 203 /* wrmsr with exception handling */ 199 - static inline int wrmsr_safe(u32 msr, u32 low, u32 high) 204 + static inline int wrmsrq_safe(u32 msr, u64 val) 200 205 { 201 - return native_write_msr_safe(msr, low, high); 206 + return native_write_msr_safe(msr, val); 202 207 } 203 208 204 209 /* rdmsr with exception handling */ ··· 242 247 } 243 248 244 249 /* 245 - * 64-bit version of wrmsr_safe(): 250 + * Dual u32 version of wrmsrq_safe(): 246 251 */ 247 - static inline int wrmsrq_safe(u32 msr, u64 val) 252 + static inline int wrmsr_safe(u32 msr, u32 low, u32 high) 248 253 { 249 - return wrmsr_safe(msr, (u32)val, (u32)(val >> 32)); 254 + return wrmsrq_safe(msr, (u64)high << 32 | low); 250 255 } 251 256 252 257 struct msr __percpu *msrs_alloc(void);
+14 -13
arch/x86/include/asm/paravirt.h
··· 180 180 return PVOP_CALL1(u64, cpu.read_msr, msr); 181 181 } 182 182 183 - static inline void paravirt_write_msr(unsigned msr, 184 - unsigned low, unsigned high) 183 + static inline void paravirt_write_msr(u32 msr, u64 val) 185 184 { 186 - PVOP_VCALL3(cpu.write_msr, msr, low, high); 185 + PVOP_VCALL2(cpu.write_msr, msr, val); 187 186 } 188 187 189 188 static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) ··· 190 191 return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err); 191 192 } 192 193 193 - static inline int paravirt_write_msr_safe(unsigned msr, 194 - unsigned low, unsigned high) 194 + static inline int paravirt_write_msr_safe(u32 msr, u64 val) 195 195 { 196 - return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high); 196 + return PVOP_CALL2(int, cpu.write_msr_safe, msr, val); 197 197 } 198 198 199 199 #define rdmsr(msr, val1, val2) \ ··· 202 204 val2 = _l >> 32; \ 203 205 } while (0) 204 206 205 - #define wrmsr(msr, val1, val2) \ 206 - do { \ 207 - paravirt_write_msr(msr, val1, val2); \ 208 - } while (0) 207 + static __always_inline void wrmsr(u32 msr, u32 low, u32 high) 208 + { 209 + paravirt_write_msr(msr, (u64)high << 32 | low); 210 + } 209 211 210 212 #define rdmsrq(msr, val) \ 211 213 do { \ 212 214 val = paravirt_read_msr(msr); \ 213 215 } while (0) 214 216 215 - static inline void wrmsrq(unsigned msr, u64 val) 217 + static inline void wrmsrq(u32 msr, u64 val) 216 218 { 217 - wrmsr(msr, (u32)val, (u32)(val>>32)); 219 + paravirt_write_msr(msr, val); 218 220 } 219 221 220 - #define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b) 222 + static inline int wrmsrq_safe(u32 msr, u64 val) 223 + { 224 + return paravirt_write_msr_safe(msr, val); 225 + } 221 226 222 227 /* rdmsr with exception handling */ 223 228 #define rdmsr_safe(msr, a, b) \
+2 -2
arch/x86/include/asm/paravirt_types.h
··· 92 92 93 93 /* Unsafe MSR operations. These will warn or panic on failure. */ 94 94 u64 (*read_msr)(unsigned int msr); 95 - void (*write_msr)(unsigned int msr, unsigned low, unsigned high); 95 + void (*write_msr)(u32 msr, u64 val); 96 96 97 97 /* 98 98 * Safe MSR operations. 99 99 * read sets err to 0 or -EIO. write returns 0 or -EIO. 100 100 */ 101 101 u64 (*read_msr_safe)(unsigned int msr, int *err); 102 - int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high); 102 + int (*write_msr_safe)(u32 msr, u64 val); 103 103 104 104 u64 (*read_pmc)(int counter); 105 105
+1 -1
arch/x86/kernel/kvmclock.c
··· 196 196 void kvmclock_disable(void) 197 197 { 198 198 if (msr_kvm_system_time) 199 - native_write_msr(msr_kvm_system_time, 0, 0); 199 + native_write_msr(msr_kvm_system_time, 0); 200 200 } 201 201 202 202 static void __init kvmclock_init_mem(void)
+3 -12
arch/x86/kvm/svm/svm.c
··· 476 476 477 477 static void svm_init_erratum_383(void) 478 478 { 479 - u32 low, high; 480 479 int err; 481 480 u64 val; 482 481 ··· 489 490 490 491 val |= (1ULL << 47); 491 492 492 - low = lower_32_bits(val); 493 - high = upper_32_bits(val); 494 - 495 - native_write_msr_safe(MSR_AMD64_DC_CFG, low, high); 493 + native_write_msr_safe(MSR_AMD64_DC_CFG, val); 496 494 497 495 erratum_383_found = true; 498 496 } ··· 2164 2168 2165 2169 /* Clear MCi_STATUS registers */ 2166 2170 for (i = 0; i < 6; ++i) 2167 - native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0); 2171 + native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0); 2168 2172 2169 2173 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err); 2170 2174 if (!err) { 2171 - u32 low, high; 2172 - 2173 2175 value &= ~(1ULL << 2); 2174 - low = lower_32_bits(value); 2175 - high = upper_32_bits(value); 2176 - 2177 - native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high); 2176 + native_write_msr_safe(MSR_IA32_MCG_STATUS, value); 2178 2177 } 2179 2178 2180 2179 /* Flush tlb to evict multi-match entries */
+11 -19
arch/x86/xen/enlighten_pv.c
··· 1111 1111 return val; 1112 1112 } 1113 1113 1114 - static void set_seg(u32 which, u32 low, u32 high) 1114 + static void set_seg(u32 which, u64 base) 1115 1115 { 1116 - u64 base = ((u64)high << 32) | low; 1117 - 1118 1116 if (HYPERVISOR_set_segment_base(which, base)) 1119 1117 WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base); 1120 1118 } ··· 1122 1124 * With err == NULL write_msr() semantics are selected. 1123 1125 * Supplying an err pointer requires err to be pre-initialized with 0. 1124 1126 */ 1125 - static void xen_do_write_msr(unsigned int msr, unsigned int low, 1126 - unsigned int high, int *err) 1127 + static void xen_do_write_msr(u32 msr, u64 val, int *err) 1127 1128 { 1128 - u64 val; 1129 - 1130 1129 switch (msr) { 1131 1130 case MSR_FS_BASE: 1132 - set_seg(SEGBASE_FS, low, high); 1131 + set_seg(SEGBASE_FS, val); 1133 1132 break; 1134 1133 1135 1134 case MSR_KERNEL_GS_BASE: 1136 - set_seg(SEGBASE_GS_USER, low, high); 1135 + set_seg(SEGBASE_GS_USER, val); 1137 1136 break; 1138 1137 1139 1138 case MSR_GS_BASE: 1140 - set_seg(SEGBASE_GS_KERNEL, low, high); 1139 + set_seg(SEGBASE_GS_KERNEL, val); 1141 1140 break; 1142 1141 1143 1142 case MSR_STAR: ··· 1150 1155 break; 1151 1156 1152 1157 default: 1153 - val = (u64)high << 32 | low; 1154 - 1155 1158 if (pmu_msr_chk_emulated(msr, &val, false)) 1156 1159 return; 1157 1160 1158 1161 if (err) 1159 - *err = native_write_msr_safe(msr, low, high); 1162 + *err = native_write_msr_safe(msr, val); 1160 1163 else 1161 - native_write_msr(msr, low, high); 1164 + native_write_msr(msr, val); 1162 1165 } 1163 1166 } 1164 1167 ··· 1165 1172 return xen_do_read_msr(msr, err); 1166 1173 } 1167 1174 1168 - static int xen_write_msr_safe(unsigned int msr, unsigned int low, 1169 - unsigned int high) 1175 + static int xen_write_msr_safe(u32 msr, u64 val) 1170 1176 { 1171 1177 int err = 0; 1172 1178 1173 - xen_do_write_msr(msr, low, high, &err); 1179 + xen_do_write_msr(msr, val, &err); 1174 1180 1175 1181 return err; 1176 1182 } ··· 1181 1189 return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL); 1182 1190 } 1183 1191 1184 - static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) 1192 + static void xen_write_msr(u32 msr, u64 val) 1185 1193 { 1186 1194 int err; 1187 1195 1188 - xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL); 1196 + xen_do_write_msr(msr, val, xen_msr_safe ? &err : NULL); 1189 1197 } 1190 1198 1191 1199 /* This is called once we have the cpu_possible_mask */