Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sparc64: Make montmul/montsqr/mpmul usable in 32-bit threads.

The Montgomery Multiply, Montgomery Square, and Multiple-Precision
Multiply instructions work by loading a combination of the floating
point and multiple register windows worth of integer registers
with the inputs.

These values are 64-bit. But for 32-bit userland processes we only
save the low 32-bits of each integer register during a register spill.
This is because the register window save area is in the user stack and
has a fixed layout.

Therefore, the only way to use these instruction in 32-bit mode is to
perform the following sequence:

1) Load the top-32bits of a choosen integer register with a sentinel,
say "-1". This will be in the outer-most register window.

The idea is that we're trying to see if the outer-most register
window gets spilled, and thus the 64-bit values were truncated.

2) Load all the inputs for the montmul/montsqr/mpmul instruction,
down to the inner-most register window.

3) Execute the opcode.

4) Traverse back up to the outer-most register window.

5) Check the sentinel, if it's still "-1" store the results.
Otherwise retry the entire sequence.

This retry is extremely troublesome. If you're just unlucky and an
interrupt or other trap happens, it'll push that outer-most window to
the stack and clear the sentinel when we restore it.

We could retry forever and never make forward progress if interrupts
arrive at a fast enough rate (consider perf events as one example).
So we have do limited retries and fallback to software which is
extremely non-deterministic.

Luckily it's very straightforward to provide a mechanism to let
32-bit applications use a 64-bit stack. Stacks in 64-bit mode are
biased by 2047 bytes, which means that the lowest bit is set in the
actual %sp register value.

So if we see bit zero set in a 32-bit application's stack we treat
it like a 64-bit stack.

Runtime detection of such a facility is tricky, and cumbersome at
best. For example, just trying to use a biased stack and seeing if it
works is hard to recover from (the signal handler will need to use an
alt stack, plus something along the lines of longjmp). Therefore, we
add a system call to report a bitmask of arch specific features like
this in a cheap and less hairy way.

With help from Andy Polyakov.

Signed-off-by: David S. Miller <davem@davemloft.net>

+117 -61
+3 -2
arch/sparc/include/asm/compat.h
··· 232 232 struct pt_regs *regs = current_thread_info()->kregs; 233 233 unsigned long usp = regs->u_regs[UREG_I6]; 234 234 235 - if (!(test_thread_flag(TIF_32BIT))) 235 + if (test_thread_64bit_stack(usp)) 236 236 usp += STACK_BIAS; 237 - else 237 + 238 + if (test_thread_flag(TIF_32BIT)) 238 239 usp &= 0xffffffffUL; 239 240 240 241 usp -= len;
+5
arch/sparc/include/asm/thread_info_64.h
··· 259 259 260 260 #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 261 261 262 + #define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0) 263 + #define test_thread_64bit_stack(__SP) \ 264 + ((test_thread_flag(TIF_32BIT) && !thread32_stack_is_64bit(__SP)) ? \ 265 + false : true) 266 + 262 267 #endif /* !__ASSEMBLY__ */ 263 268 264 269 #endif /* __KERNEL__ */
+16 -8
arch/sparc/include/asm/ttable.h
··· 372 372 373 373 /* Normal 32bit spill */ 374 374 #define SPILL_2_GENERIC(ASI) \ 375 - srl %sp, 0, %sp; \ 375 + and %sp, 1, %g3; \ 376 + brnz,pn %g3, (. - (128 + 4)); \ 377 + srl %sp, 0, %sp; \ 376 378 stwa %l0, [%sp + %g0] ASI; \ 377 379 mov 0x04, %g3; \ 378 380 stwa %l1, [%sp + %g3] ASI; \ ··· 400 398 stwa %i6, [%g1 + %g0] ASI; \ 401 399 stwa %i7, [%g1 + %g3] ASI; \ 402 400 saved; \ 403 - retry; nop; nop; \ 401 + retry; \ 404 402 b,a,pt %xcc, spill_fixup_dax; \ 405 403 b,a,pt %xcc, spill_fixup_mna; \ 406 404 b,a,pt %xcc, spill_fixup; 407 405 408 406 #define SPILL_2_GENERIC_ETRAP \ 409 407 etrap_user_spill_32bit: \ 410 - srl %sp, 0, %sp; \ 408 + and %sp, 1, %g3; \ 409 + brnz,pn %g3, etrap_user_spill_64bit; \ 410 + srl %sp, 0, %sp; \ 411 411 stwa %l0, [%sp + 0x00] %asi; \ 412 412 stwa %l1, [%sp + 0x04] %asi; \ 413 413 stwa %l2, [%sp + 0x08] %asi; \ ··· 431 427 ba,pt %xcc, etrap_save; \ 432 428 wrpr %g1, %cwp; \ 433 429 nop; nop; nop; nop; \ 434 - nop; nop; nop; nop; \ 430 + nop; nop; \ 435 431 ba,a,pt %xcc, etrap_spill_fixup_32bit; \ 436 432 ba,a,pt %xcc, etrap_spill_fixup_32bit; \ 437 433 ba,a,pt %xcc, etrap_spill_fixup_32bit; ··· 596 592 597 593 /* Normal 32bit fill */ 598 594 #define FILL_2_GENERIC(ASI) \ 599 - srl %sp, 0, %sp; \ 595 + and %sp, 1, %g3; \ 596 + brnz,pn %g3, (. - (128 + 4)); \ 597 + srl %sp, 0, %sp; \ 600 598 lduwa [%sp + %g0] ASI, %l0; \ 601 599 mov 0x04, %g2; \ 602 600 mov 0x08, %g3; \ ··· 622 616 lduwa [%g1 + %g3] ASI, %i6; \ 623 617 lduwa [%g1 + %g5] ASI, %i7; \ 624 618 restored; \ 625 - retry; nop; nop; nop; nop; \ 619 + retry; nop; nop; \ 626 620 b,a,pt %xcc, fill_fixup_dax; \ 627 621 b,a,pt %xcc, fill_fixup_mna; \ 628 622 b,a,pt %xcc, fill_fixup; 629 623 630 624 #define FILL_2_GENERIC_RTRAP \ 631 625 user_rtt_fill_32bit: \ 632 - srl %sp, 0, %sp; \ 626 + and %sp, 1, %g3; \ 627 + brnz,pn %g3, user_rtt_fill_64bit; \ 628 + srl %sp, 0, %sp; \ 633 629 lduwa [%sp + 0x00] %asi, %l0; \ 634 630 lduwa [%sp + 0x04] %asi, %l1; \ 635 631 lduwa [%sp + 0x08] %asi, %l2; \ ··· 651 643 ba,pt %xcc, user_rtt_pre_restore; \ 652 644 restored; \ 653 645 nop; nop; nop; nop; nop; \ 654 - nop; nop; nop; nop; nop; \ 646 + nop; nop; nop; \ 655 647 ba,a,pt %xcc, user_rtt_fill_fixup; \ 656 648 ba,a,pt %xcc, user_rtt_fill_fixup; \ 657 649 ba,a,pt %xcc, user_rtt_fill_fixup;
+5 -1
arch/sparc/include/uapi/asm/unistd.h
··· 405 405 #define __NR_setns 337 406 406 #define __NR_process_vm_readv 338 407 407 #define __NR_process_vm_writev 339 408 + #define __NR_kern_features 340 408 409 409 - #define NR_syscalls 340 410 + #define NR_syscalls 341 411 + 412 + /* Bitmask values returned from kern_features system call. */ 413 + #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 410 414 411 415 #ifdef __32bit_syscall_numbers__ 412 416 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
+16 -6
arch/sparc/kernel/perf_event.c
··· 1762 1762 1763 1763 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; 1764 1764 do { 1765 - struct sparc_stackf32 *usf, sf; 1766 1765 unsigned long pc; 1767 1766 1768 - usf = (struct sparc_stackf32 *) ufp; 1769 - if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) 1770 - break; 1767 + if (thread32_stack_is_64bit(ufp)) { 1768 + struct sparc_stackf *usf, sf; 1771 1769 1772 - pc = sf.callers_pc; 1773 - ufp = (unsigned long)sf.fp; 1770 + ufp += STACK_BIAS; 1771 + usf = (struct sparc_stackf *) ufp; 1772 + if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) 1773 + break; 1774 + pc = sf.callers_pc & 0xffffffff; 1775 + ufp = ((unsigned long) sf.fp) & 0xffffffff; 1776 + } else { 1777 + struct sparc_stackf32 *usf, sf; 1778 + usf = (struct sparc_stackf32 *) ufp; 1779 + if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) 1780 + break; 1781 + pc = sf.callers_pc; 1782 + ufp = (unsigned long)sf.fp; 1783 + } 1774 1784 perf_callchain_store(entry, pc); 1775 1785 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1776 1786 }
+23 -19
arch/sparc/kernel/process_64.c
··· 452 452 /* It's a bit more tricky when 64-bit tasks are involved... */ 453 453 static unsigned long clone_stackframe(unsigned long csp, unsigned long psp) 454 454 { 455 + bool stack_64bit = test_thread_64bit_stack(psp); 455 456 unsigned long fp, distance, rval; 456 457 457 - if (!(test_thread_flag(TIF_32BIT))) { 458 + if (stack_64bit) { 458 459 csp += STACK_BIAS; 459 460 psp += STACK_BIAS; 460 461 __get_user(fp, &(((struct reg_window __user *)psp)->ins[6])); 461 462 fp += STACK_BIAS; 463 + if (test_thread_flag(TIF_32BIT)) 464 + fp &= 0xffffffff; 462 465 } else 463 466 __get_user(fp, &(((struct reg_window32 __user *)psp)->ins[6])); 464 467 ··· 475 472 rval = (csp - distance); 476 473 if (copy_in_user((void __user *) rval, (void __user *) psp, distance)) 477 474 rval = 0; 478 - else if (test_thread_flag(TIF_32BIT)) { 475 + else if (!stack_64bit) { 479 476 if (put_user(((u32)csp), 480 477 &(((struct reg_window32 __user *)rval)->ins[6]))) 481 478 rval = 0; ··· 510 507 511 508 flush_user_windows(); 512 509 if ((window = get_thread_wsaved()) != 0) { 513 - int winsize = sizeof(struct reg_window); 514 - int bias = 0; 515 - 516 - if (test_thread_flag(TIF_32BIT)) 517 - winsize = sizeof(struct reg_window32); 518 - else 519 - bias = STACK_BIAS; 520 - 521 510 window -= 1; 522 511 do { 523 - unsigned long sp = (t->rwbuf_stkptrs[window] + bias); 524 512 struct reg_window *rwin = &t->reg_window[window]; 513 + int winsize = sizeof(struct reg_window); 514 + unsigned long sp; 515 + 516 + sp = t->rwbuf_stkptrs[window]; 517 + 518 + if (test_thread_64bit_stack(sp)) 519 + sp += STACK_BIAS; 520 + else 521 + winsize = sizeof(struct reg_window32); 525 522 526 523 if (!copy_to_user((char __user *)sp, rwin, winsize)) { 527 524 shift_window_buffer(window, get_thread_wsaved() - 1, t); ··· 547 544 { 548 545 struct thread_info *t = current_thread_info(); 549 546 unsigned long window; 550 - int winsize = sizeof(struct reg_window); 551 - int bias = 0; 552 - 553 - if (test_thread_flag(TIF_32BIT)) 554 - winsize = sizeof(struct reg_window32); 555 - else 556 - bias = STACK_BIAS; 557 547 558 548 flush_user_windows(); 559 549 window = get_thread_wsaved(); ··· 554 558 if (likely(window != 0)) { 555 559 window -= 1; 556 560 do { 557 - unsigned long sp = (t->rwbuf_stkptrs[window] + bias); 558 561 struct reg_window *rwin = &t->reg_window[window]; 562 + int winsize = sizeof(struct reg_window); 563 + unsigned long sp; 564 + 565 + sp = t->rwbuf_stkptrs[window]; 566 + 567 + if (test_thread_64bit_stack(sp)) 568 + sp += STACK_BIAS; 569 + else 570 + winsize = sizeof(struct reg_window32); 559 571 560 572 if (unlikely(sp & 0x7UL)) 561 573 stack_unaligned(sp);
+2 -2
arch/sparc/kernel/ptrace_64.c
··· 151 151 { 152 152 unsigned long rw_addr = regs->u_regs[UREG_I6]; 153 153 154 - if (test_tsk_thread_flag(current, TIF_32BIT)) { 154 + if (!test_thread_64bit_stack(rw_addr)) { 155 155 struct reg_window32 win32; 156 156 int i; 157 157 ··· 176 176 { 177 177 unsigned long rw_addr = regs->u_regs[UREG_I6]; 178 178 179 - if (test_tsk_thread_flag(current, TIF_32BIT)) { 179 + if (!test_thread_64bit_stack(rw_addr)) { 180 180 struct reg_window32 win32; 181 181 int i; 182 182
+5
arch/sparc/kernel/sys_sparc_64.c
··· 751 751 : "cc"); 752 752 return __res; 753 753 } 754 + 755 + asmlinkage long sys_kern_features(void) 756 + { 757 + return KERN_FEATURE_MIXED_MODE_STACK; 758 + }
+2
arch/sparc/kernel/systbls_64.S
··· 86 86 .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init 87 87 /*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime 88 88 .word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev 89 + /*340*/ .word sys_kern_features 89 90 90 91 #endif /* CONFIG_COMPAT */ 91 92 ··· 164 163 .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init 165 164 /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime 166 165 .word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev 166 + /*340*/ .word sys_kern_features
+23 -13
arch/sparc/kernel/unaligned_64.c
··· 113 113 114 114 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) 115 115 { 116 - unsigned long value; 116 + unsigned long value, fp; 117 117 118 118 if (reg < 16) 119 119 return (!reg ? 0 : regs->u_regs[reg]); 120 + 121 + fp = regs->u_regs[UREG_FP]; 122 + 120 123 if (regs->tstate & TSTATE_PRIV) { 121 124 struct reg_window *win; 122 - win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); 125 + win = (struct reg_window *)(fp + STACK_BIAS); 123 126 value = win->locals[reg - 16]; 124 - } else if (test_thread_flag(TIF_32BIT)) { 127 + } else if (!test_thread_64bit_stack(fp)) { 125 128 struct reg_window32 __user *win32; 126 - win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); 129 + win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); 127 130 get_user(value, &win32->locals[reg - 16]); 128 131 } else { 129 132 struct reg_window __user *win; 130 - win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); 133 + win = (struct reg_window __user *)(fp + STACK_BIAS); 131 134 get_user(value, &win->locals[reg - 16]); 132 135 } 133 136 return value; ··· 138 135 139 136 static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs) 140 137 { 138 + unsigned long fp; 139 + 141 140 if (reg < 16) 142 141 return &regs->u_regs[reg]; 142 + 143 + fp = regs->u_regs[UREG_FP]; 144 + 143 145 if (regs->tstate & TSTATE_PRIV) { 144 146 struct reg_window *win; 145 - win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); 147 + win = (struct reg_window *)(fp + STACK_BIAS); 146 148 return &win->locals[reg - 16]; 147 - } else if (test_thread_flag(TIF_32BIT)) { 149 + } else if (!test_thread_64bit_stack(fp)) { 148 150 struct reg_window32 *win32; 149 - win32 = (struct reg_window32 *)((unsigned long)((u32)regs->u_regs[UREG_FP])); 151 + win32 = (struct reg_window32 *)((unsigned long)((u32)fp)); 150 152 return (unsigned long *)&win32->locals[reg - 16]; 151 153 } else { 152 154 struct reg_window *win; 153 - win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); 155 + win = (struct reg_window *)(fp + STACK_BIAS); 154 156 return &win->locals[reg - 16]; 155 157 } 156 158 } ··· 400 392 if (rd) 401 393 regs->u_regs[rd] = ret; 402 394 } else { 403 - if (test_thread_flag(TIF_32BIT)) { 395 + unsigned long fp = regs->u_regs[UREG_FP]; 396 + 397 + if (!test_thread_64bit_stack(fp)) { 404 398 struct reg_window32 __user *win32; 405 - win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); 399 + win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); 406 400 put_user(ret, &win32->locals[rd - 16]); 407 401 } else { 408 402 struct reg_window __user *win; 409 - win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); 403 + win = (struct reg_window __user *)(fp + STACK_BIAS); 410 404 put_user(ret, &win->locals[rd - 16]); 411 405 } 412 406 } ··· 564 554 reg[0] = 0; 565 555 if ((insn & 0x780000) == 0x180000) 566 556 reg[1] = 0; 567 - } else if (test_thread_flag(TIF_32BIT)) { 557 + } else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) { 568 558 put_user(0, (int __user *) reg); 569 559 if ((insn & 0x780000) == 0x180000) 570 560 put_user(0, ((int __user *) reg) + 1);
+14 -9
arch/sparc/kernel/visemul.c
··· 149 149 150 150 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) 151 151 { 152 - unsigned long value; 152 + unsigned long value, fp; 153 153 154 154 if (reg < 16) 155 155 return (!reg ? 0 : regs->u_regs[reg]); 156 + 157 + fp = regs->u_regs[UREG_FP]; 158 + 156 159 if (regs->tstate & TSTATE_PRIV) { 157 160 struct reg_window *win; 158 - win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); 161 + win = (struct reg_window *)(fp + STACK_BIAS); 159 162 value = win->locals[reg - 16]; 160 - } else if (test_thread_flag(TIF_32BIT)) { 163 + } else if (!test_thread_64bit_stack(fp)) { 161 164 struct reg_window32 __user *win32; 162 - win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); 165 + win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); 163 166 get_user(value, &win32->locals[reg - 16]); 164 167 } else { 165 168 struct reg_window __user *win; 166 - win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); 169 + win = (struct reg_window __user *)(fp + STACK_BIAS); 167 170 get_user(value, &win->locals[reg - 16]); 168 171 } 169 172 return value; ··· 175 172 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, 176 173 struct pt_regs *regs) 177 174 { 175 + unsigned long fp = regs->u_regs[UREG_FP]; 176 + 178 177 BUG_ON(reg < 16); 179 178 BUG_ON(regs->tstate & TSTATE_PRIV); 180 179 181 - if (test_thread_flag(TIF_32BIT)) { 180 + if (!test_thread_64bit_stack(fp)) { 182 181 struct reg_window32 __user *win32; 183 - win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); 182 + win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); 184 183 return (unsigned long __user *)&win32->locals[reg - 16]; 185 184 } else { 186 185 struct reg_window __user *win; 187 - win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); 186 + win = (struct reg_window __user *)(fp + STACK_BIAS); 188 187 return &win->locals[reg - 16]; 189 188 } 190 189 } ··· 209 204 } else { 210 205 unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); 211 206 212 - if (test_thread_flag(TIF_32BIT)) 207 + if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) 213 208 __put_user((u32)val, (u32 __user *)rd_user); 214 209 else 215 210 __put_user(val, rd_user);
+2
arch/sparc/kernel/winfixup.S
··· 43 43 spill_fixup_dax: 44 44 TRAP_LOAD_THREAD_REG(%g6, %g1) 45 45 ldx [%g6 + TI_FLAGS], %g1 46 + andcc %sp, 0x1, %g0 47 + movne %icc, 0, %g1 46 48 andcc %g1, _TIF_32BIT, %g0 47 49 ldub [%g6 + TI_WSAVED], %g1 48 50 sll %g1, 3, %g3
+1 -1
arch/sparc/math-emu/math_64.c
··· 320 320 XR = 0; 321 321 else if (freg < 16) 322 322 XR = regs->u_regs[freg]; 323 - else if (test_thread_flag(TIF_32BIT)) { 323 + else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) { 324 324 struct reg_window32 __user *win32; 325 325 flushw_user (); 326 326 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));