Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390: prevent leaking kernel address in BEAR

When userspace executes a syscall or gets interrupted,
BEAR contains a kernel address when returning to userspace.
This make it pretty easy to figure out where the kernel is
mapped even with KASLR enabled. To fix this, add lpswe to
lowcore and always execute it there, so userspace sees only
the lowcore address of lpswe. For this we have to extend
both critical_cleanup and the SWITCH_ASYNC macro to also check
for lpswe addresses in lowcore.

Fixes: b2d24b97b2a9 ("s390/kernel: add support for kernel address space layout randomization (KASLR)")
Cc: <stable@vger.kernel.org> # v5.2+
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>

authored by

Sven Schnelle and committed by
Vasily Gorbik
0b38b5e1 d2abfbe4

+62 -27
+3 -1
arch/s390/include/asm/lowcore.h
··· 141 141 142 142 /* br %r1 trampoline */ 143 143 __u16 br_r1_trampoline; /* 0x0400 */ 144 - __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */ 144 + __u32 return_lpswe; /* 0x0402 */ 145 + __u32 return_mcck_lpswe; /* 0x0406 */ 146 + __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */ 145 147 146 148 /* 147 149 * 0xe00 contains the address of the IPL Parameter Information
+1
arch/s390/include/asm/processor.h
··· 162 162 #define INIT_THREAD { \ 163 163 .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ 164 164 .fpu.regs = (void *) init_task.thread.fpu.fprs, \ 165 + .last_break = 1, \ 165 166 } 166 167 167 168 /*
+7
arch/s390/include/asm/setup.h
··· 8 8 9 9 #include <linux/bits.h> 10 10 #include <uapi/asm/setup.h> 11 + #include <linux/build_bug.h> 11 12 12 13 #define EP_OFFSET 0x10008 13 14 #define EP_STRING "S390EP" ··· 161 160 static inline unsigned long kaslr_offset(void) 162 161 { 163 162 return __kaslr_offset; 163 + } 164 + 165 + static inline u32 gen_lpswe(unsigned long addr) 166 + { 167 + BUILD_BUG_ON(addr > 0xfff); 168 + return 0xb2b20000 | addr; 164 169 } 165 170 166 171 #else /* __ASSEMBLY__ */
+2
arch/s390/kernel/asm-offsets.c
··· 124 124 OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code); 125 125 OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); 126 126 OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr); 127 + OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe); 128 + OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe); 127 129 OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw); 128 130 OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw); 129 131 OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
+39 -26
arch/s390/kernel/entry.S
··· 115 115 116 116 .macro SWITCH_ASYNC savearea,timer 117 117 tmhh %r8,0x0001 # interrupting from user ? 118 - jnz 1f 118 + jnz 2f 119 119 lgr %r14,%r9 120 + cghi %r14,__LC_RETURN_LPSWE 121 + je 0f 120 122 slg %r14,BASED(.Lcritical_start) 121 123 clg %r14,BASED(.Lcritical_length) 122 - jhe 0f 124 + jhe 1f 125 + 0: 123 126 lghi %r11,\savearea # inside critical section, do cleanup 124 127 brasl %r14,cleanup_critical 125 128 tmhh %r8,0x0001 # retest problem state after cleanup 126 - jnz 1f 127 - 0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? 129 + jnz 2f 130 + 1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? 128 131 slgr %r14,%r15 129 132 srag %r14,%r14,STACK_SHIFT 130 - jnz 2f 133 + jnz 3f 131 134 CHECK_STACK \savearea 132 135 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) 133 - j 3f 134 - 1: UPDATE_VTIME %r14,%r15,\timer 136 + j 4f 137 + 2: UPDATE_VTIME %r14,%r15,\timer 135 138 BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP 136 - 2: lg %r15,__LC_ASYNC_STACK # load async stack 137 - 3: la %r11,STACK_FRAME_OVERHEAD(%r15) 139 + 3: lg %r15,__LC_ASYNC_STACK # load async stack 140 + 4: la %r11,STACK_FRAME_OVERHEAD(%r15) 138 141 .endm 139 142 140 143 .macro UPDATE_VTIME w1,w2,enter_timer ··· 404 401 stpt __LC_EXIT_TIMER 405 402 mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 406 403 lmg %r11,%r15,__PT_R11(%r11) 407 - lpswe __LC_RETURN_PSW 404 + b __LC_RETURN_LPSWE(%r0) 408 405 .Lsysc_done: 409 406 410 407 # ··· 611 608 BPOFF 612 609 stmg %r8,%r15,__LC_SAVE_AREA_SYNC 613 610 lg %r10,__LC_LAST_BREAK 614 - lg %r12,__LC_CURRENT 611 + srag %r11,%r10,12 612 + jnz 0f 613 + /* if __LC_LAST_BREAK is < 4096, it contains one of 614 + * the lpswe addresses in lowcore. Set it to 1 (initial state) 615 + * to prevent leaking that address to userspace. 616 + */ 617 + lghi %r10,1 618 + 0: lg %r12,__LC_CURRENT 615 619 lghi %r11,0 616 620 larl %r13,cleanup_critical 617 621 lmg %r8,%r9,__LC_PGM_OLD_PSW 618 622 tmhh %r8,0x0001 # test problem state bit 619 - jnz 2f # -> fault in user space 623 + jnz 3f # -> fault in user space 620 624 #if IS_ENABLED(CONFIG_KVM) 621 625 # cleanup critical section for program checks in sie64a 622 626 lgr %r14,%r9 623 627 slg %r14,BASED(.Lsie_critical_start) 624 628 clg %r14,BASED(.Lsie_critical_length) 625 - jhe 0f 629 + jhe 1f 626 630 lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer 627 631 ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE 628 632 lctlg %c1,%c1,__LC_USER_ASCE # load primary asce 629 633 larl %r9,sie_exit # skip forward to sie_exit 630 634 lghi %r11,_PIF_GUEST_FAULT 631 635 #endif 632 - 0: tmhh %r8,0x4000 # PER bit set in old PSW ? 633 - jnz 1f # -> enabled, can't be a double fault 636 + 1: tmhh %r8,0x4000 # PER bit set in old PSW ? 637 + jnz 2f # -> enabled, can't be a double fault 634 638 tm __LC_PGM_ILC+3,0x80 # check for per exception 635 639 jnz .Lpgm_svcper # -> single stepped svc 636 - 1: CHECK_STACK __LC_SAVE_AREA_SYNC 640 + 2: CHECK_STACK __LC_SAVE_AREA_SYNC 637 641 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) 638 - # CHECK_VMAP_STACK branches to stack_overflow or 4f 639 - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f 640 - 2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER 642 + # CHECK_VMAP_STACK branches to stack_overflow or 5f 643 + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f 644 + 3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER 641 645 BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP 642 646 lg %r15,__LC_KERNEL_STACK 643 647 lgr %r14,%r12 644 648 aghi %r14,__TASK_thread # pointer to thread_struct 645 649 lghi %r13,__LC_PGM_TDB 646 650 tm __LC_PGM_ILC+2,0x02 # check for transaction abort 647 - jz 3f 651 + jz 4f 648 652 mvc __THREAD_trap_tdb(256,%r14),0(%r13) 649 - 3: stg %r10,__THREAD_last_break(%r14) 650 - 4: lgr %r13,%r11 653 + 4: stg %r10,__THREAD_last_break(%r14) 654 + 5: lgr %r13,%r11 651 655 la %r11,STACK_FRAME_OVERHEAD(%r15) 652 656 stmg %r0,%r7,__PT_R0(%r11) 653 657 # clear user controlled registers to prevent speculative use ··· 673 663 stg %r13,__PT_FLAGS(%r11) 674 664 stg %r10,__PT_ARGS(%r11) 675 665 tm __LC_PGM_ILC+3,0x80 # check for per exception 676 - jz 5f 666 + jz 6f 677 667 tmhh %r8,0x0001 # kernel per event ? 678 668 jz .Lpgm_kprobe 679 669 oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP 680 670 mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS 681 671 mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE 682 672 mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID 683 - 5: REENABLE_IRQS 673 + 6: REENABLE_IRQS 684 674 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) 685 675 larl %r1,pgm_check_table 686 676 llgh %r10,__PT_INT_CODE+2(%r11) ··· 785 775 mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 786 776 .Lio_exit_kernel: 787 777 lmg %r11,%r15,__PT_R11(%r11) 788 - lpswe __LC_RETURN_PSW 778 + b __LC_RETURN_LPSWE(%r0) 789 779 .Lio_done: 790 780 791 781 # ··· 1224 1214 stpt __LC_EXIT_TIMER 1225 1215 mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 1226 1216 0: lmg %r11,%r15,__PT_R11(%r11) 1227 - lpswe __LC_RETURN_MCCK_PSW 1217 + b __LC_RETURN_MCCK_LPSWE 1228 1218 1229 1219 .Lmcck_panic: 1230 1220 lg %r15,__LC_NODAT_STACK ··· 1281 1271 #endif 1282 1272 1283 1273 ENTRY(cleanup_critical) 1274 + cghi %r9,__LC_RETURN_LPSWE 1275 + je .Lcleanup_lpswe 1284 1276 #if IS_ENABLED(CONFIG_KVM) 1285 1277 clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap 1286 1278 jl 0f ··· 1436 1424 mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) 1437 1425 mvc 0(64,%r11),__PT_R8(%r9) 1438 1426 lmg %r0,%r7,__PT_R0(%r9) 1427 + .Lcleanup_lpswe: 1439 1428 1: lmg %r8,%r9,__LC_RETURN_PSW 1440 1429 BR_EX %r14,%r11 1441 1430 .Lcleanup_sysc_restore_insn:
+1
arch/s390/kernel/process.c
··· 106 106 p->thread.system_timer = 0; 107 107 p->thread.hardirq_timer = 0; 108 108 p->thread.softirq_timer = 0; 109 + p->thread.last_break = 1; 109 110 110 111 frame->sf.back_chain = 0; 111 112 /* new return point is ret_from_fork */
+3
arch/s390/kernel/setup.c
··· 73 73 #include <asm/nospec-branch.h> 74 74 #include <asm/mem_detect.h> 75 75 #include <asm/uv.h> 76 + #include <asm/asm-offsets.h> 76 77 #include "entry.h" 77 78 78 79 /* ··· 451 450 lc->spinlock_index = 0; 452 451 arch_spin_lock_setup(0); 453 452 lc->br_r1_trampoline = 0x07f1; /* br %r1 */ 453 + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); 454 + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); 454 455 455 456 set_prefix((u32)(unsigned long) lc); 456 457 lowcore_ptr[0] = lc;
+2
arch/s390/kernel/smp.c
··· 212 212 lc->spinlock_lockval = arch_spin_lockval(cpu); 213 213 lc->spinlock_index = 0; 214 214 lc->br_r1_trampoline = 0x07f1; /* br %r1 */ 215 + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); 216 + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); 215 217 if (nmi_alloc_per_cpu(lc)) 216 218 goto out_async; 217 219 if (vdso_alloc_per_cpu(lc))
+4
arch/s390/mm/vmem.c
··· 415 415 SET_MEMORY_RO | SET_MEMORY_X); 416 416 __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT, 417 417 SET_MEMORY_RO | SET_MEMORY_X); 418 + 419 + /* we need lowcore executable for our LPSWE instructions */ 420 + set_memory_x(0, 1); 421 + 418 422 pr_info("Write protected kernel read-only data: %luk\n", 419 423 (unsigned long)(__end_rodata - _stext) >> 10); 420 424 }