Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390: add support for BEAR enhancement facility

The Breaking-Event-Address-Register (BEAR) stores the address of the
last breaking event instruction. Breaking events are usually instructions
that change the program flow - for example branches, and instructions
that modify the address in the PSW like lpswe. This is useful for debugging
wild branches, because one could easily figure out where the wild branch
was originating from.

What is problematic is that lpswe is considered a breaking event, and
therefore overwrites BEAR on kernel exit. The BEAR enhancement facility
adds new instructions that allow to save/restore BEAR and also an lpswey
instruction that doesn't cause a breaking event. So we can save BEAR on
kernel entry and restore it on exit to user space.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>

authored by

Sven Schnelle and committed by
Vasily Gorbik
3b051e89 5d17d4ed

+87 -24
+3
arch/s390/include/asm/cpu.h
··· 12 12 #ifndef __ASSEMBLY__ 13 13 14 14 #include <linux/types.h> 15 + #include <linux/jump_label.h> 15 16 16 17 struct cpuid 17 18 { ··· 21 20 unsigned int machine : 16; 22 21 unsigned int unused : 16; 23 22 } __attribute__ ((packed, aligned(8))); 23 + 24 + DECLARE_STATIC_KEY_FALSE(cpu_has_bear); 24 25 25 26 #endif /* __ASSEMBLY__ */ 26 27 #endif /* _ASM_S390_CPU_H */
+4 -3
arch/s390/include/asm/lowcore.h
··· 93 93 psw_t return_psw; /* 0x0290 */ 94 94 psw_t return_mcck_psw; /* 0x02a0 */ 95 95 96 + __u64 last_break; /* 0x02b0 */ 97 + 96 98 /* CPU accounting and timing values. */ 97 - __u64 sys_enter_timer; /* 0x02b0 */ 98 - __u8 pad_0x02b8[0x02c0-0x02b8]; /* 0x02b8 */ 99 + __u64 sys_enter_timer; /* 0x02b8 */ 99 100 __u64 mcck_enter_timer; /* 0x02c0 */ 100 101 __u64 exit_timer; /* 0x02c8 */ 101 102 __u64 user_timer; /* 0x02d0 */ ··· 189 188 __u32 tod_progreg_save_area; /* 0x1324 */ 190 189 __u32 cpu_timer_save_area[2]; /* 0x1328 */ 191 190 __u32 clock_comp_save_area[2]; /* 0x1330 */ 192 - __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */ 191 + __u64 last_break_save_area; /* 0x1338 */ 193 192 __u32 access_regs_save_area[16]; /* 0x1340 */ 194 193 __u64 cregs_save_area[16]; /* 0x1380 */ 195 194 __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */
+3
arch/s390/kernel/asm-offsets.c
··· 35 35 OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); 36 36 OFFSET(__PT_FLAGS, pt_regs, flags); 37 37 OFFSET(__PT_CR1, pt_regs, cr1); 38 + OFFSET(__PT_LAST_BREAK, pt_regs, last_break); 38 39 DEFINE(__PT_SIZE, sizeof(struct pt_regs)); 39 40 BLANK(); 40 41 /* stack_frame offsets */ ··· 128 127 OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); 129 128 OFFSET(__LC_GMAP, lowcore, gmap); 130 129 OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); 130 + OFFSET(__LC_LAST_BREAK, lowcore, last_break); 131 131 /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ 132 132 OFFSET(__LC_DUMP_REIPL, lowcore, ipib); 133 133 /* hardware defined lowcore locations 0x1000 - 0x18ff */ ··· 142 140 OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area); 143 141 OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area); 144 142 OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area); 143 + OFFSET(__LC_LAST_BREAK_SAVE_AREA, lowcore, last_break_save_area); 145 144 OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area); 146 145 OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area); 147 146 OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
+37 -8
arch/s390/kernel/entry.S
··· 52 52 53 53 _LPP_OFFSET = __LC_LPP 54 54 55 + .macro STBEAR address 56 + ALTERNATIVE "", ".insn s,0xb2010000,\address", 193 57 + .endm 58 + 59 + .macro LBEAR address 60 + ALTERNATIVE "", ".insn s,0xb2000000,\address", 193 61 + .endm 62 + 63 + .macro LPSWEY address,lpswe 64 + ALTERNATIVE "b \lpswe", ".insn siy,0xeb0000000071,\address,0", 193 65 + .endm 66 + 67 + .macro MBEAR reg 68 + ALTERNATIVE "", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193 69 + .endm 70 + 55 71 .macro CHECK_STACK savearea 56 72 #ifdef CONFIG_CHECK_STACK 57 73 tml %r15,STACK_SIZE - CONFIG_STACK_GUARD ··· 318 302 BPOFF 319 303 lghi %r14,0 320 304 .Lsysc_per: 305 + STBEAR __LC_LAST_BREAK 321 306 lctlg %c1,%c1,__LC_KERNEL_ASCE 322 307 lg %r12,__LC_CURRENT 323 308 lg %r15,__LC_KERNEL_STACK ··· 338 321 xgr %r11,%r11 339 322 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs 340 323 mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC 324 + MBEAR %r2 341 325 lgr %r3,%r14 342 326 brasl %r14,__do_syscall 343 327 lctlg %c1,%c1,__LC_USER_ASCE 344 328 mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) 345 329 BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP 330 + LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) 346 331 lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) 347 332 stpt __LC_EXIT_TIMER 348 - b __LC_RETURN_LPSWE 333 + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE 349 334 ENDPROC(system_call) 350 335 351 336 # ··· 359 340 lctlg %c1,%c1,__LC_USER_ASCE 360 341 mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) 361 342 BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP 343 + LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) 362 344 lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) 363 345 stpt __LC_EXIT_TIMER 364 - b __LC_RETURN_LPSWE 346 + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE 365 347 ENDPROC(ret_from_fork) 366 348 367 349 /* ··· 402 382 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) 403 383 stmg %r0,%r7,__PT_R0(%r11) 404 384 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC 385 + mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK 405 386 stmg %r8,%r9,__PT_PSW(%r11) 406 387 407 388 # clear user controlled registers to prevent speculative use ··· 422 401 stpt __LC_EXIT_TIMER 423 402 .Lpgm_exit_kernel: 424 403 mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) 404 + LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) 425 405 lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) 426 - b __LC_RETURN_LPSWE 406 + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE 427 407 428 408 # 429 409 # single stepped system call ··· 434 412 larl %r14,.Lsysc_per 435 413 stg %r14,__LC_RETURN_PSW+8 436 414 lghi %r14,1 437 - lpswe __LC_RETURN_PSW # branch to .Lsysc_per 415 + LBEAR __LC_PGM_LAST_BREAK 416 + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per 438 417 ENDPROC(pgm_check_handler) 439 418 440 419 /* ··· 445 422 ENTRY(\name) 446 423 STCK __LC_INT_CLOCK 447 424 stpt __LC_SYS_ENTER_TIMER 425 + STBEAR __LC_LAST_BREAK 448 426 BPOFF 449 427 stmg %r8,%r15,__LC_SAVE_AREA_ASYNC 450 428 lg %r12,__LC_CURRENT ··· 477 453 xgr %r10,%r10 478 454 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) 479 455 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC 456 + MBEAR %r11 480 457 stmg %r8,%r9,__PT_PSW(%r11) 481 458 tm %r8,0x0001 # coming from user space? 482 459 jno 1f ··· 490 465 lctlg %c1,%c1,__LC_USER_ASCE 491 466 BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP 492 467 stpt __LC_EXIT_TIMER 493 - 2: lmg %r0,%r15,__PT_R0(%r11) 494 - b __LC_RETURN_LPSWE 468 + 2: LBEAR __PT_LAST_BREAK(%r11) 469 + lmg %r0,%r15,__PT_R0(%r11) 470 + LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE 495 471 ENDPROC(\name) 496 472 .endm 497 473 ··· 531 505 BPOFF 532 506 la %r1,4095 # validate r1 533 507 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer 508 + LBEAR __LC_LAST_BREAK_SAVE_AREA-4095(%r1) # validate bear 534 509 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs 535 510 lg %r12,__LC_CURRENT 536 511 lmg %r8,%r9,__LC_MCK_OLD_PSW ··· 618 591 jno 0f 619 592 BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP 620 593 stpt __LC_EXIT_TIMER 621 - 0: lmg %r11,%r15,__PT_R11(%r11) 622 - b __LC_RETURN_MCCK_LPSWE 594 + 0: ALTERNATIVE "", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193 595 + LBEAR 0(%r12) 596 + lmg %r11,%r15,__PT_R11(%r11) 597 + LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE 623 598 624 599 .Lmcck_panic: 625 600 /*
+8 -2
arch/s390/kernel/irq.c
··· 140 140 141 141 irq_enter(); 142 142 143 - if (user_mode(regs)) 143 + if (user_mode(regs)) { 144 144 update_timer_sys(); 145 + if (static_branch_likely(&cpu_has_bear)) 146 + current->thread.last_break = regs->last_break; 147 + } 145 148 146 149 from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; 147 150 if (from_idle) ··· 174 171 175 172 irq_enter(); 176 173 177 - if (user_mode(regs)) 174 + if (user_mode(regs)) { 178 175 update_timer_sys(); 176 + if (static_branch_likely(&cpu_has_bear)) 177 + current->thread.last_break = regs->last_break; 178 + } 179 179 180 180 regs->int_code = S390_lowcore.ext_int_code_addr; 181 181 regs->int_parm = S390_lowcore.ext_params;
+1 -1
arch/s390/kernel/process.c
··· 141 141 frame->childregs.gprs[10] = arg; 142 142 frame->childregs.gprs[11] = (unsigned long)do_exit; 143 143 frame->childregs.orig_gpr2 = -1; 144 - 144 + frame->childregs.last_break = 1; 145 145 return 0; 146 146 } 147 147 frame->childregs = *current_pt_regs();
+5
arch/s390/kernel/setup.c
··· 174 174 struct lowcore *lowcore_ptr[NR_CPUS]; 175 175 EXPORT_SYMBOL(lowcore_ptr); 176 176 177 + DEFINE_STATIC_KEY_FALSE(cpu_has_bear); 178 + 177 179 /* 178 180 * The Write Back bit position in the physaddr is given by the SLPC PCI. 179 181 * Leaving the mask zero always uses write through which is safe ··· 1039 1037 numa_setup(); 1040 1038 smp_detect_cpus(); 1041 1039 topology_init_early(); 1040 + 1041 + if (test_facility(193)) 1042 + static_branch_enable(&cpu_has_bear); 1042 1043 1043 1044 /* 1044 1045 * Create kernel page tables and switch to virtual addressing.
+2
arch/s390/kernel/syscall.c
··· 154 154 regs->psw = S390_lowcore.svc_old_psw; 155 155 regs->int_code = S390_lowcore.svc_int_code; 156 156 update_timer_sys(); 157 + if (static_branch_likely(&cpu_has_bear)) 158 + current->thread.last_break = regs->last_break; 157 159 158 160 local_irq_enable(); 159 161 regs->orig_gpr2 = regs->gprs[2];
+5 -5
arch/s390/kernel/traps.c
··· 300 300 301 301 void noinstr __do_pgm_check(struct pt_regs *regs) 302 302 { 303 - unsigned long last_break = S390_lowcore.pgm_last_break; 304 303 unsigned int trapnr; 305 304 irqentry_state_t state; 306 305 ··· 310 311 311 312 if (user_mode(regs)) { 312 313 update_timer_sys(); 313 - if (last_break < 4096) 314 - last_break = 1; 315 - current->thread.last_break = last_break; 316 - regs->last_break = last_break; 314 + if (!static_branch_likely(&cpu_has_bear)) { 315 + if (regs->last_break < 4096) 316 + regs->last_break = 1; 317 + } 318 + current->thread.last_break = regs->last_break; 317 319 } 318 320 319 321 if (S390_lowcore.pgm_code & 0x0200) {
+11 -3
arch/s390/mm/dump_pagetables.c
··· 8 8 #include <linux/kasan.h> 9 9 #include <asm/ptdump.h> 10 10 #include <asm/kasan.h> 11 + #include <asm/nospec-branch.h> 11 12 #include <asm/sections.h> 12 13 13 14 static unsigned long max_addr; ··· 117 116 return; 118 117 if (st->current_prot & _PAGE_NOEXEC) 119 118 return; 120 - /* The first lowcore page is currently still W+X. */ 121 - if (addr == PAGE_SIZE) 119 + /* 120 + * The first lowcore page is W+X if spectre mitigations are using 121 + * trampolines or the BEAR enhancements facility is not installed, 122 + * in which case we have two lpswe instructions in lowcore that need 123 + * to be executable. 124 + */ 125 + if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear))) 122 126 return; 123 127 WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n", 124 128 (void *)st->start_address); ··· 209 203 if (st.wx_pages) 210 204 pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages); 211 205 else 212 - pr_info("Checked W+X mappings: passed, no unexpected W+X pages found\n"); 206 + pr_info("Checked W+X mappings: passed, no %sW+X pages found\n", 207 + (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ? 208 + "unexpected " : ""); 213 209 } 214 210 #endif /* CONFIG_DEBUG_WX */ 215 211
+8 -2
arch/s390/mm/vmem.c
··· 13 13 #include <linux/hugetlb.h> 14 14 #include <linux/slab.h> 15 15 #include <asm/cacheflush.h> 16 + #include <asm/nospec-branch.h> 16 17 #include <asm/pgalloc.h> 17 18 #include <asm/setup.h> 18 19 #include <asm/tlbflush.h> ··· 585 584 __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, 586 585 SET_MEMORY_RO | SET_MEMORY_X); 587 586 588 - /* we need lowcore executable for our LPSWE instructions */ 589 - set_memory_x(0, 1); 587 + if (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) { 588 + /* 589 + * Lowcore must be executable for LPSWE 590 + * and expoline trampoline branch instructions. 591 + */ 592 + set_memory_x(0, 1); 593 + } 590 594 591 595 pr_info("Write protected kernel read-only data: %luk\n", 592 596 (unsigned long)(__end_rodata - _stext) >> 10);