Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sh: Move over to dynamically allocated FPU context.

This follows the x86 xstate changes and implements a task_xstate slab
cache that is dynamically sized to match one of hard FP/soft FP/FPU-less.

This also tidies up and consolidates some of the SH-2A/SH-4 FPU
fragmentation. Now fpu state restorers are commonly defined, with the
init_fpu()/fpu_init() mess reworked to follow the x86 convention.
The fpu_init() register initialization has been replaced by xstate setup
followed by writing out to hardware via the standard restore path.

As init_fpu() now performs a slab allocation a secondary lighterweight
restorer is also introduced for the context switch.

In the future the DSP state will be rolled in here, too.

More work remains for math emulation and the SH-5 FPU, which presently
uses its own special (UP-only) interfaces.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

+292 -285
+12 -23
arch/sh/include/asm/fpu.h
··· 2 2 #define __ASM_SH_FPU_H 3 3 4 4 #ifndef __ASSEMBLY__ 5 - #include <linux/preempt.h> 6 - #include <asm/ptrace.h> 5 + 6 + struct task_struct; 7 7 8 8 #ifdef CONFIG_SH_FPU 9 9 static inline void release_fpu(struct pt_regs *regs) ··· 16 16 regs->sr &= ~SR_FD; 17 17 } 18 18 19 - struct task_struct; 20 - 21 19 extern void save_fpu(struct task_struct *__tsk); 22 - void fpu_state_restore(struct pt_regs *regs); 20 + extern void restore_fpu(struct task_struct *__tsk); 21 + extern void fpu_state_restore(struct pt_regs *regs); 22 + extern void __fpu_state_restore(void); 23 23 #else 24 - 25 - #define save_fpu(tsk) do { } while (0) 26 - #define release_fpu(regs) do { } while (0) 27 - #define grab_fpu(regs) do { } while (0) 28 - #define fpu_state_restore(regs) do { } while (0) 29 - 24 + #define save_fpu(tsk) do { } while (0) 25 + #define restore_fpu(tsk) do { } while (0) 26 + #define release_fpu(regs) do { } while (0) 27 + #define grab_fpu(regs) do { } while (0) 28 + #define fpu_state_restore(regs) do { } while (0) 29 + #define __fpu_state_restore(regs) do { } while (0) 30 30 #endif 31 31 32 32 struct user_regset; 33 33 34 34 extern int do_fpu_inst(unsigned short, struct pt_regs *); 35 + extern int init_fpu(struct task_struct *); 35 36 36 37 extern int fpregs_get(struct task_struct *target, 37 38 const struct user_regset *regset, ··· 64 63 release_fpu(regs); 65 64 } 66 65 preempt_enable(); 67 - } 68 - 69 - static inline int init_fpu(struct task_struct *tsk) 70 - { 71 - if (tsk_used_math(tsk)) { 72 - if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current) 73 - unlazy_fpu(tsk, task_pt_regs(tsk)); 74 - return 0; 75 - } 76 - 77 - set_stopped_child_used_math(tsk); 78 - return 0; 79 66 } 80 67 81 68 #endif /* __ASSEMBLY__ */
+10 -6
arch/sh/include/asm/processor_32.h
··· 90 90 unsigned long entry_pc; 91 91 }; 92 92 93 - union sh_fpu_union { 94 - struct sh_fpu_hard_struct hard; 95 - struct sh_fpu_soft_struct soft; 93 + union thread_xstate { 94 + struct sh_fpu_hard_struct hardfpu; 95 + struct sh_fpu_soft_struct softfpu; 96 96 }; 97 + 98 + extern unsigned int xstate_size; 99 + extern void free_thread_xstate(struct task_struct *); 100 + extern struct kmem_cache *task_xstate_cachep; 97 101 98 102 struct thread_struct { 99 103 /* Saved registers when thread is descheduled */ ··· 107 103 /* Hardware debugging registers */ 108 104 unsigned long ubc_pc; 109 105 110 - /* floating point info */ 111 - union sh_fpu_union fpu; 112 - 113 106 #ifdef CONFIG_SH_DSP 114 107 /* Dsp status information */ 115 108 struct sh_dsp_struct dsp_status; 116 109 #endif 110 + 111 + /* Extended processor state */ 112 + union thread_xstate *xstate; 117 113 }; 118 114 119 115 /* Count of active tasks with UBC settings */
+4
arch/sh/include/asm/thread_info.h
··· 97 97 98 98 extern struct thread_info *alloc_thread_info(struct task_struct *tsk); 99 99 extern void free_thread_info(struct thread_info *ti); 100 + extern void arch_task_cache_init(void); 101 + #define arch_task_cache_init arch_task_cache_init 102 + extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); 103 + extern void init_thread_xstate(void); 100 104 101 105 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR 102 106
+2
arch/sh/kernel/cpu/Makefile
··· 17 17 18 18 obj-$(CONFIG_SH_ADC) += adc.o 19 19 obj-$(CONFIG_SH_CLK_CPG) += clock-cpg.o 20 + obj-$(CONFIG_SH_FPU) += fpu.o 21 + obj-$(CONFIG_SH_FPU_EMU) += fpu.o 20 22 21 23 obj-y += irq/ init.o clock.o hwblk.o
+82
arch/sh/kernel/cpu/fpu.c
··· 1 + #include <linux/sched.h> 2 + #include <asm/processor.h> 3 + #include <asm/fpu.h> 4 + 5 + int init_fpu(struct task_struct *tsk) 6 + { 7 + if (tsk_used_math(tsk)) { 8 + if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current) 9 + unlazy_fpu(tsk, task_pt_regs(tsk)); 10 + return 0; 11 + } 12 + 13 + /* 14 + * Memory allocation at the first usage of the FPU and other state. 15 + */ 16 + if (!tsk->thread.xstate) { 17 + tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, 18 + GFP_KERNEL); 19 + if (!tsk->thread.xstate) 20 + return -ENOMEM; 21 + } 22 + 23 + if (boot_cpu_data.flags & CPU_HAS_FPU) { 24 + struct sh_fpu_hard_struct *fp = &tsk->thread.xstate->hardfpu; 25 + memset(fp, 0, xstate_size); 26 + fp->fpscr = FPSCR_INIT; 27 + } else { 28 + struct sh_fpu_soft_struct *fp = &tsk->thread.xstate->softfpu; 29 + memset(fp, 0, xstate_size); 30 + fp->fpscr = FPSCR_INIT; 31 + } 32 + 33 + set_stopped_child_used_math(tsk); 34 + return 0; 35 + } 36 + 37 + #ifdef CONFIG_SH_FPU 38 + void __fpu_state_restore(void) 39 + { 40 + struct task_struct *tsk = current; 41 + 42 + restore_fpu(tsk); 43 + 44 + task_thread_info(tsk)->status |= TS_USEDFPU; 45 + tsk->fpu_counter++; 46 + } 47 + 48 + void fpu_state_restore(struct pt_regs *regs) 49 + { 50 + struct task_struct *tsk = current; 51 + 52 + if (unlikely(!user_mode(regs))) { 53 + printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); 54 + BUG(); 55 + return; 56 + } 57 + 58 + if (!tsk_used_math(tsk)) { 59 + /* 60 + * does a slab alloc which can sleep 61 + */ 62 + if (init_fpu(tsk)) { 63 + /* 64 + * ran out of memory! 65 + */ 66 + do_group_exit(SIGKILL); 67 + return; 68 + } 69 + } 70 + 71 + grab_fpu(regs); 72 + 73 + __fpu_state_restore(); 74 + } 75 + 76 + BUILD_TRAP_HANDLER(fpu_state_restore) 77 + { 78 + TRAP_HANDLER_DECL; 79 + 80 + fpu_state_restore(regs); 81 + } 82 + #endif /* CONFIG_SH_FPU */
+48 -32
arch/sh/kernel/cpu/init.c
··· 28 28 #include <asm/ubc.h> 29 29 #endif 30 30 31 + #ifdef CONFIG_SH_FPU 32 + #define cpu_has_fpu 1 33 + #else 34 + #define cpu_has_fpu 0 35 + #endif 36 + 37 + #ifdef CONFIG_SH_DSP 38 + #define cpu_has_dsp 1 39 + #else 40 + #define cpu_has_dsp 0 41 + #endif 42 + 31 43 /* 32 44 * Generic wrapper for command line arguments to disable on-chip 33 45 * peripherals (nofpu, nodsp, and so forth). 34 46 */ 35 - #define onchip_setup(x) \ 36 - static int x##_disabled __initdata = 0; \ 37 - \ 38 - static int __init x##_setup(char *opts) \ 39 - { \ 40 - x##_disabled = 1; \ 41 - return 1; \ 42 - } \ 47 + #define onchip_setup(x) \ 48 + static int x##_disabled __initdata = !cpu_has_##x; \ 49 + \ 50 + static int __init x##_setup(char *opts) \ 51 + { \ 52 + x##_disabled = 1; \ 53 + return 1; \ 54 + } \ 43 55 __setup("no" __stringify(x), x##_setup); 44 56 45 57 onchip_setup(fpu); ··· 219 207 l2_cache_shape = -1; /* No S-cache */ 220 208 } 221 209 210 + static void __init fpu_init(void) 211 + { 212 + /* Disable the FPU */ 213 + if (fpu_disabled && (current_cpu_data.flags & CPU_HAS_FPU)) { 214 + printk("FPU Disabled\n"); 215 + current_cpu_data.flags &= ~CPU_HAS_FPU; 216 + } 217 + 218 + disable_fpu(); 219 + clear_used_math(); 220 + } 221 + 222 222 #ifdef CONFIG_SH_DSP 223 223 static void __init release_dsp(void) 224 224 { ··· 268 244 if (sr & SR_DSP) 269 245 current_cpu_data.flags |= CPU_HAS_DSP; 270 246 247 + /* Disable the DSP */ 248 + if (dsp_disabled && (current_cpu_data.flags & CPU_HAS_DSP)) { 249 + printk("DSP Disabled\n"); 250 + current_cpu_data.flags &= ~CPU_HAS_DSP; 251 + } 252 + 271 253 /* Now that we've determined the DSP status, clear the DSP bit. */ 272 254 release_dsp(); 273 255 } 256 + #else 257 + static inline void __init dsp_init(void) { } 274 258 #endif /* CONFIG_SH_DSP */ 275 259 276 260 /** ··· 334 302 detect_cache_shape(); 335 303 } 336 304 337 - /* Disable the FPU */ 338 - if (fpu_disabled) { 339 - printk("FPU Disabled\n"); 340 - current_cpu_data.flags &= ~CPU_HAS_FPU; 341 - } 342 - 343 - /* FPU initialization */ 344 - disable_fpu(); 345 - if ((current_cpu_data.flags & CPU_HAS_FPU)) { 346 - current_thread_info()->status &= ~TS_USEDFPU; 347 - clear_used_math(); 348 - } 305 + fpu_init(); 306 + dsp_init(); 349 307 350 308 /* 351 309 * Initialize the per-CPU ASID cache very early, since the ··· 343 321 */ 344 322 current_cpu_data.asid_cache = NO_CONTEXT; 345 323 346 - #ifdef CONFIG_SH_DSP 347 - /* Probe for DSP */ 348 - dsp_init(); 349 - 350 - /* Disable the DSP */ 351 - if (dsp_disabled) { 352 - printk("DSP Disabled\n"); 353 - current_cpu_data.flags &= ~CPU_HAS_DSP; 354 - release_dsp(); 355 - } 356 - #endif 357 - 358 324 speculative_execution_init(); 359 325 expmask_init(); 326 + 327 + /* 328 + * Boot processor to setup the FP and extended state context info. 329 + */ 330 + if (raw_smp_processor_id() == 0) 331 + init_thread_xstate(); 360 332 }
+23 -88
arch/sh/kernel/cpu/sh2a/fpu.c
··· 26 26 /* 27 27 * Save FPU registers onto task structure. 28 28 */ 29 - void 30 - save_fpu(struct task_struct *tsk) 29 + void save_fpu(struct task_struct *tsk) 31 30 { 32 31 unsigned long dummy; 33 32 ··· 51 52 "fmov.s fr0, @-%0\n\t" 52 53 "lds %3, fpscr\n\t" 53 54 : "=r" (dummy) 54 - : "0" ((char *)(&tsk->thread.fpu.hard.status)), 55 + : "0" ((char *)(&tsk->thread.xstate->hardfpu.status)), 55 56 "r" (FPSCR_RCHG), 56 57 "r" (FPSCR_INIT) 57 58 : "memory"); ··· 59 60 disable_fpu(); 60 61 } 61 62 62 - static void 63 - restore_fpu(struct task_struct *tsk) 63 + void restore_fpu(struct task_struct *tsk) 64 64 { 65 65 unsigned long dummy; 66 66 ··· 83 85 "lds.l @%0+, fpscr\n\t" 84 86 "lds.l @%0+, fpul\n\t" 85 87 : "=r" (dummy) 86 - : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG) 88 + : "0" (tsk->thread.xstate), "r" (FPSCR_RCHG) 87 89 : "memory"); 88 - disable_fpu(); 89 - } 90 - 91 - /* 92 - * Load the FPU with signalling NANS. This bit pattern we're using 93 - * has the property that no matter wether considered as single or as 94 - * double precission represents signaling NANS. 95 - */ 96 - 97 - static void 98 - fpu_init(void) 99 - { 100 - enable_fpu(); 101 - asm volatile("lds %0, fpul\n\t" 102 - "fsts fpul, fr0\n\t" 103 - "fsts fpul, fr1\n\t" 104 - "fsts fpul, fr2\n\t" 105 - "fsts fpul, fr3\n\t" 106 - "fsts fpul, fr4\n\t" 107 - "fsts fpul, fr5\n\t" 108 - "fsts fpul, fr6\n\t" 109 - "fsts fpul, fr7\n\t" 110 - "fsts fpul, fr8\n\t" 111 - "fsts fpul, fr9\n\t" 112 - "fsts fpul, fr10\n\t" 113 - "fsts fpul, fr11\n\t" 114 - "fsts fpul, fr12\n\t" 115 - "fsts fpul, fr13\n\t" 116 - "fsts fpul, fr14\n\t" 117 - "fsts fpul, fr15\n\t" 118 - "lds %2, fpscr\n\t" 119 - : /* no output */ 120 - : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); 121 90 disable_fpu(); 122 91 } 123 92 ··· 455 490 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ 456 491 struct task_struct *tsk = current; 457 492 458 - if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) { 493 + if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_FPU_ERROR)) { 459 494 /* FPU error */ 460 - denormal_to_double (&tsk->thread.fpu.hard, 495 + denormal_to_double (&tsk->thread.xstate->hardfpu, 461 496 (finsn >> 8) & 0xf); 462 497 } else 463 498 return 0; ··· 472 507 473 508 n = (finsn >> 8) & 0xf; 474 509 m = (finsn >> 4) & 0xf; 475 - hx = tsk->thread.fpu.hard.fp_regs[n]; 476 - hy = tsk->thread.fpu.hard.fp_regs[m]; 477 - fpscr = tsk->thread.fpu.hard.fpscr; 510 + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; 511 + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; 512 + fpscr = tsk->thread.xstate->hardfpu.fpscr; 478 513 prec = fpscr & (1 << 19); 479 514 480 515 if ((fpscr & FPSCR_FPU_ERROR) ··· 484 519 485 520 /* FPU error because of denormal */ 486 521 llx = ((long long) hx << 32) 487 - | tsk->thread.fpu.hard.fp_regs[n+1]; 522 + | tsk->thread.xstate->hardfpu.fp_regs[n+1]; 488 523 lly = ((long long) hy << 32) 489 - | tsk->thread.fpu.hard.fp_regs[m+1]; 524 + | tsk->thread.xstate->hardfpu.fp_regs[m+1]; 490 525 if ((hx & 0x7fffffff) >= 0x00100000) 491 526 llx = denormal_muld(lly, llx); 492 527 else 493 528 llx = denormal_muld(llx, lly); 494 - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 495 - tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; 529 + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; 530 + tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff; 496 531 } else if ((fpscr & FPSCR_FPU_ERROR) 497 532 && (!prec && ((hx & 0x7fffffff) < 0x00800000 498 533 || (hy & 0x7fffffff) < 0x00800000))) { ··· 501 536 hx = denormal_mulf(hy, hx); 502 537 else 503 538 hx = denormal_mulf(hx, hy); 504 - tsk->thread.fpu.hard.fp_regs[n] = hx; 539 + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; 505 540 } else 506 541 return 0; 507 542 ··· 515 550 516 551 n = (finsn >> 8) & 0xf; 517 552 m = (finsn >> 4) & 0xf; 518 - hx = tsk->thread.fpu.hard.fp_regs[n]; 519 - hy = tsk->thread.fpu.hard.fp_regs[m]; 520 - fpscr = tsk->thread.fpu.hard.fpscr; 553 + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; 554 + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; 555 + fpscr = tsk->thread.xstate->hardfpu.fpscr; 521 556 prec = fpscr & (1 << 19); 522 557 523 558 if ((fpscr & FPSCR_FPU_ERROR) ··· 527 562 528 563 /* FPU error because of denormal */ 529 564 llx = ((long long) hx << 32) 530 - | tsk->thread.fpu.hard.fp_regs[n+1]; 565 + | tsk->thread.xstate->hardfpu.fp_regs[n+1]; 531 566 lly = ((long long) hy << 32) 532 - | tsk->thread.fpu.hard.fp_regs[m+1]; 567 + | tsk->thread.xstate->hardfpu.fp_regs[m+1]; 533 568 if ((finsn & 0xf00f) == 0xf000) 534 569 llx = denormal_addd(llx, lly); 535 570 else 536 571 llx = denormal_addd(llx, lly ^ (1LL << 63)); 537 - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 538 - tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff; 572 + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; 573 + tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff; 539 574 } else if ((fpscr & FPSCR_FPU_ERROR) 540 575 && (!prec && ((hx & 0x7fffffff) < 0x00800000 541 576 || (hy & 0x7fffffff) < 0x00800000))) { ··· 544 579 hx = denormal_addf(hx, hy); 545 580 else 546 581 hx = denormal_addf(hx, hy ^ 0x80000000); 547 - tsk->thread.fpu.hard.fp_regs[n] = hx; 582 + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; 548 583 } else 549 584 return 0; 550 585 ··· 562 597 563 598 __unlazy_fpu(tsk, regs); 564 599 if (ieee_fpe_handler(regs)) { 565 - tsk->thread.fpu.hard.fpscr &= 600 + tsk->thread.xstate->hardfpu.fpscr &= 566 601 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); 567 602 grab_fpu(regs); 568 603 restore_fpu(tsk); ··· 571 606 } 572 607 573 608 force_sig(SIGFPE, tsk); 574 - } 575 - 576 - void fpu_state_restore(struct pt_regs *regs) 577 - { 578 - struct task_struct *tsk = current; 579 - 580 - grab_fpu(regs); 581 - if (unlikely(!user_mode(regs))) { 582 - printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); 583 - BUG(); 584 - return; 585 - } 586 - 587 - if (likely(used_math())) { 588 - /* Using the FPU again. */ 589 - restore_fpu(tsk); 590 - } else { 591 - /* First time FPU user. */ 592 - fpu_init(); 593 - set_used_math(); 594 - } 595 - task_thread_info(tsk)->status |= TS_USEDFPU; 596 - tsk->fpu_counter++; 597 - } 598 - 599 - BUILD_TRAP_HANDLER(fpu_state_restore) 600 - { 601 - TRAP_HANDLER_DECL; 602 - 603 - fpu_state_restore(regs); 604 609 }
+39 -120
arch/sh/kernel/cpu/sh4/fpu.c
··· 85 85 "fmov.s fr1, @-%0\n\t" 86 86 "fmov.s fr0, @-%0\n\t" 87 87 "lds %3, fpscr\n\t":"=r" (dummy) 88 - :"0"((char *)(&tsk->thread.fpu.hard.status)), 88 + :"0"((char *)(&tsk->thread.xstate->hardfpu.status)), 89 89 "r"(FPSCR_RCHG), "r"(FPSCR_INIT) 90 90 :"memory"); 91 91 92 92 disable_fpu(); 93 93 } 94 94 95 - static void restore_fpu(struct task_struct *tsk) 95 + void restore_fpu(struct task_struct *tsk) 96 96 { 97 97 unsigned long dummy; 98 98 ··· 135 135 "lds.l @%0+, fpscr\n\t" 136 136 "lds.l @%0+, fpul\n\t" 137 137 :"=r" (dummy) 138 - :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG) 138 + :"0" (tsk->thread.xstate), "r" (FPSCR_RCHG) 139 139 :"memory"); 140 - disable_fpu(); 141 - } 142 - 143 - /* 144 - * Load the FPU with signalling NANS. This bit pattern we're using 145 - * has the property that no matter wether considered as single or as 146 - * double precision represents signaling NANS. 147 - */ 148 - 149 - static void fpu_init(void) 150 - { 151 - enable_fpu(); 152 - asm volatile ( "lds %0, fpul\n\t" 153 - "lds %1, fpscr\n\t" 154 - "fsts fpul, fr0\n\t" 155 - "fsts fpul, fr1\n\t" 156 - "fsts fpul, fr2\n\t" 157 - "fsts fpul, fr3\n\t" 158 - "fsts fpul, fr4\n\t" 159 - "fsts fpul, fr5\n\t" 160 - "fsts fpul, fr6\n\t" 161 - "fsts fpul, fr7\n\t" 162 - "fsts fpul, fr8\n\t" 163 - "fsts fpul, fr9\n\t" 164 - "fsts fpul, fr10\n\t" 165 - "fsts fpul, fr11\n\t" 166 - "fsts fpul, fr12\n\t" 167 - "fsts fpul, fr13\n\t" 168 - "fsts fpul, fr14\n\t" 169 - "fsts fpul, fr15\n\t" 170 - "frchg\n\t" 171 - "fsts fpul, fr0\n\t" 172 - "fsts fpul, fr1\n\t" 173 - "fsts fpul, fr2\n\t" 174 - "fsts fpul, fr3\n\t" 175 - "fsts fpul, fr4\n\t" 176 - "fsts fpul, fr5\n\t" 177 - "fsts fpul, fr6\n\t" 178 - "fsts fpul, fr7\n\t" 179 - "fsts fpul, fr8\n\t" 180 - "fsts fpul, fr9\n\t" 181 - "fsts fpul, fr10\n\t" 182 - "fsts fpul, fr11\n\t" 183 - "fsts fpul, fr12\n\t" 184 - "fsts fpul, fr13\n\t" 185 - "fsts fpul, fr14\n\t" 186 - "fsts fpul, fr15\n\t" 187 - "frchg\n\t" 188 - "lds %2, fpscr\n\t" 189 - : /* no output */ 190 - :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT)); 191 140 disable_fpu(); 192 141 } 193 142 ··· 231 282 /* fcnvsd */ 232 283 struct task_struct *tsk = current; 233 284 234 - if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)) 285 + if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR)) 235 286 /* FPU error */ 236 - denormal_to_double(&tsk->thread.fpu.hard, 287 + denormal_to_double(&tsk->thread.xstate->hardfpu, 237 288 (finsn >> 8) & 0xf); 238 289 else 239 290 return 0; ··· 249 300 250 301 n = (finsn >> 8) & 0xf; 251 302 m = (finsn >> 4) & 0xf; 252 - hx = tsk->thread.fpu.hard.fp_regs[n]; 253 - hy = tsk->thread.fpu.hard.fp_regs[m]; 254 - fpscr = tsk->thread.fpu.hard.fpscr; 303 + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; 304 + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; 305 + fpscr = tsk->thread.xstate->hardfpu.fpscr; 255 306 prec = fpscr & FPSCR_DBL_PRECISION; 256 307 257 308 if ((fpscr & FPSCR_CAUSE_ERROR) ··· 261 312 262 313 /* FPU error because of denormal (doubles) */ 263 314 llx = ((long long)hx << 32) 264 - | tsk->thread.fpu.hard.fp_regs[n + 1]; 315 + | tsk->thread.xstate->hardfpu.fp_regs[n + 1]; 265 316 lly = ((long long)hy << 32) 266 - | tsk->thread.fpu.hard.fp_regs[m + 1]; 317 + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; 267 318 llx = float64_mul(llx, lly); 268 - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 269 - tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; 319 + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; 320 + tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff; 270 321 } else if ((fpscr & FPSCR_CAUSE_ERROR) 271 322 && (!prec && ((hx & 0x7fffffff) < 0x00800000 272 323 || (hy & 0x7fffffff) < 0x00800000))) { 273 324 /* FPU error because of denormal (floats) */ 274 325 hx = float32_mul(hx, hy); 275 - tsk->thread.fpu.hard.fp_regs[n] = hx; 326 + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; 276 327 } else 277 328 return 0; 278 329 ··· 287 338 288 339 n = (finsn >> 8) & 0xf; 289 340 m = (finsn >> 4) & 0xf; 290 - hx = tsk->thread.fpu.hard.fp_regs[n]; 291 - hy = tsk->thread.fpu.hard.fp_regs[m]; 292 - fpscr = tsk->thread.fpu.hard.fpscr; 341 + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; 342 + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; 343 + fpscr = tsk->thread.xstate->hardfpu.fpscr; 293 344 prec = fpscr & FPSCR_DBL_PRECISION; 294 345 295 346 if ((fpscr & FPSCR_CAUSE_ERROR) ··· 299 350 300 351 /* FPU error because of denormal (doubles) */ 301 352 llx = ((long long)hx << 32) 302 - | tsk->thread.fpu.hard.fp_regs[n + 1]; 353 + | tsk->thread.xstate->hardfpu.fp_regs[n + 1]; 303 354 lly = ((long long)hy << 32) 304 - | tsk->thread.fpu.hard.fp_regs[m + 1]; 355 + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; 305 356 if ((finsn & 0xf00f) == 0xf000) 306 357 llx = float64_add(llx, lly); 307 358 else 308 359 llx = float64_sub(llx, lly); 309 - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 310 - tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; 360 + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; 361 + tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff; 311 362 } else if ((fpscr & FPSCR_CAUSE_ERROR) 312 363 && (!prec && ((hx & 0x7fffffff) < 0x00800000 313 364 || (hy & 0x7fffffff) < 0x00800000))) { ··· 316 367 hx = float32_add(hx, hy); 317 368 else 318 369 hx = float32_sub(hx, hy); 319 - tsk->thread.fpu.hard.fp_regs[n] = hx; 370 + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; 320 371 } else 321 372 return 0; 322 373 ··· 331 382 332 383 n = (finsn >> 8) & 0xf; 333 384 m = (finsn >> 4) & 0xf; 334 - hx = tsk->thread.fpu.hard.fp_regs[n]; 335 - hy = tsk->thread.fpu.hard.fp_regs[m]; 336 - fpscr = tsk->thread.fpu.hard.fpscr; 385 + hx = tsk->thread.xstate->hardfpu.fp_regs[n]; 386 + hy = tsk->thread.xstate->hardfpu.fp_regs[m]; 387 + fpscr = tsk->thread.xstate->hardfpu.fpscr; 337 388 prec = fpscr & FPSCR_DBL_PRECISION; 338 389 339 390 if ((fpscr & FPSCR_CAUSE_ERROR) ··· 343 394 344 395 /* FPU error because of denormal (doubles) */ 345 396 llx = ((long long)hx << 32) 346 - | tsk->thread.fpu.hard.fp_regs[n + 1]; 397 + | tsk->thread.xstate->hardfpu.fp_regs[n + 1]; 347 398 lly = ((long long)hy << 32) 348 - | tsk->thread.fpu.hard.fp_regs[m + 1]; 399 + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; 349 400 350 401 llx = float64_div(llx, lly); 351 402 352 - tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 353 - tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; 403 + tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32; 404 + tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff; 354 405 } else if ((fpscr & FPSCR_CAUSE_ERROR) 355 406 && (!prec && ((hx & 0x7fffffff) < 0x00800000 356 407 || (hy & 0x7fffffff) < 0x00800000))) { 357 408 /* FPU error because of denormal (floats) */ 358 409 hx = float32_div(hx, hy); 359 - tsk->thread.fpu.hard.fp_regs[n] = hx; 410 + tsk->thread.xstate->hardfpu.fp_regs[n] = hx; 360 411 } else 361 412 return 0; 362 413 ··· 369 420 unsigned int hx; 370 421 371 422 m = (finsn >> 8) & 0x7; 372 - hx = tsk->thread.fpu.hard.fp_regs[m]; 423 + hx = tsk->thread.xstate->hardfpu.fp_regs[m]; 373 424 374 - if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR) 425 + if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR) 375 426 && ((hx & 0x7fffffff) < 0x00100000)) { 376 427 /* subnormal double to float conversion */ 377 428 long long llx; 378 429 379 - llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32) 380 - | tsk->thread.fpu.hard.fp_regs[m + 1]; 430 + llx = ((long long)tsk->thread.xstate->hardfpu.fp_regs[m] << 32) 431 + | tsk->thread.xstate->hardfpu.fp_regs[m + 1]; 381 432 382 - tsk->thread.fpu.hard.fpul = float64_to_float32(llx); 433 + tsk->thread.xstate->hardfpu.fpul = float64_to_float32(llx); 383 434 } else 384 435 return 0; 385 436 ··· 398 449 int float_rounding_mode(void) 399 450 { 400 451 struct task_struct *tsk = current; 401 - int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr); 452 + int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.xstate->hardfpu.fpscr); 402 453 return roundingMode; 403 454 } 404 455 ··· 410 461 __unlazy_fpu(tsk, regs); 411 462 fpu_exception_flags = 0; 412 463 if (ieee_fpe_handler(regs)) { 413 - tsk->thread.fpu.hard.fpscr &= 464 + tsk->thread.xstate->hardfpu.fpscr &= 414 465 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); 415 - tsk->thread.fpu.hard.fpscr |= fpu_exception_flags; 466 + tsk->thread.xstate->hardfpu.fpscr |= fpu_exception_flags; 416 467 /* Set the FPSCR flag as well as cause bits - simply 417 468 * replicate the cause */ 418 - tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10); 469 + tsk->thread.xstate->hardfpu.fpscr |= (fpu_exception_flags >> 10); 419 470 grab_fpu(regs); 420 471 restore_fpu(tsk); 421 472 task_thread_info(tsk)->status |= TS_USEDFPU; 422 - if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) & 473 + if ((((tsk->thread.xstate->hardfpu.fpscr & FPSCR_ENABLE_MASK) >> 7) & 423 474 (fpu_exception_flags >> 2)) == 0) { 424 475 return; 425 476 } 426 477 } 427 478 428 479 force_sig(SIGFPE, tsk); 429 - } 430 - 431 - void fpu_state_restore(struct pt_regs *regs) 432 - { 433 - struct task_struct *tsk = current; 434 - 435 - grab_fpu(regs); 436 - if (unlikely(!user_mode(regs))) { 437 - printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); 438 - BUG(); 439 - return; 440 - } 441 - 442 - if (likely(used_math())) { 443 - /* Using the FPU again. */ 444 - restore_fpu(tsk); 445 - } else { 446 - /* First time FPU user. */ 447 - fpu_init(); 448 - set_used_math(); 449 - } 450 - task_thread_info(tsk)->status |= TS_USEDFPU; 451 - tsk->fpu_counter++; 452 - } 453 - 454 - BUILD_TRAP_HANDLER(fpu_state_restore) 455 - { 456 - TRAP_HANDLER_DECL; 457 - 458 - fpu_state_restore(regs); 459 480 }
+54
arch/sh/kernel/process.c
··· 2 2 #include <linux/kernel.h> 3 3 #include <linux/sched.h> 4 4 5 + struct kmem_cache *task_xstate_cachep = NULL; 6 + unsigned int xstate_size; 7 + 8 + int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 9 + { 10 + *dst = *src; 11 + 12 + if (src->thread.xstate) { 13 + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, 14 + GFP_KERNEL); 15 + if (!dst->thread.xstate) 16 + return -ENOMEM; 17 + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); 18 + } 19 + 20 + return 0; 21 + } 22 + 23 + void free_thread_xstate(struct task_struct *tsk) 24 + { 25 + if (tsk->thread.xstate) { 26 + kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 27 + tsk->thread.xstate = NULL; 28 + } 29 + } 30 + 5 31 #if THREAD_SHIFT < PAGE_SHIFT 6 32 static struct kmem_cache *thread_info_cache; 7 33 ··· 46 20 47 21 void free_thread_info(struct thread_info *ti) 48 22 { 23 + free_thread_xstate(ti->task); 49 24 kmem_cache_free(thread_info_cache, ti); 50 25 } 51 26 ··· 68 41 69 42 void free_thread_info(struct thread_info *ti) 70 43 { 44 + free_thread_xstate(ti->task); 71 45 free_pages((unsigned long)ti, THREAD_SIZE_ORDER); 72 46 } 73 47 #endif /* THREAD_SHIFT < PAGE_SHIFT */ 48 + 49 + void arch_task_cache_init(void) 50 + { 51 + if (!xstate_size) 52 + return; 53 + 54 + task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size, 55 + __alignof__(union thread_xstate), 56 + SLAB_PANIC | SLAB_NOTRACK, NULL); 57 + } 58 + 59 + #ifdef CONFIG_SH_FPU_EMU 60 + # define HAVE_SOFTFP 1 61 + #else 62 + # define HAVE_SOFTFP 0 63 + #endif 64 + 65 + void init_thread_xstate(void) 66 + { 67 + if (boot_cpu_data.flags & CPU_HAS_FPU) 68 + xstate_size = sizeof(struct sh_fpu_hard_struct); 69 + else if (HAVE_SOFTFP) 70 + xstate_size = sizeof(struct sh_fpu_soft_struct); 71 + else 72 + xstate_size = 0; 73 + }
+4 -2
arch/sh/kernel/process_32.c
··· 156 156 regs->sr = SR_FD; 157 157 regs->pc = new_pc; 158 158 regs->regs[15] = new_sp; 159 + 160 + free_thread_xstate(current); 159 161 } 160 162 EXPORT_SYMBOL(start_thread); 161 163 ··· 318 316 319 317 /* we're going to use this soon, after a few expensive things */ 320 318 if (next->fpu_counter > 5) 321 - prefetch(&next_t->fpu.hard); 319 + prefetch(next_t->xstate); 322 320 323 321 #ifdef CONFIG_MMU 324 322 /* ··· 355 353 * chances of needing FPU soon are obviously high now 356 354 */ 357 355 if (next->fpu_counter > 5) 358 - fpu_state_restore(task_pt_regs(next)); 356 + __fpu_state_restore(); 359 357 360 358 return prev; 361 359 }
+6 -6
arch/sh/kernel/ptrace_32.c
··· 163 163 164 164 if ((boot_cpu_data.flags & CPU_HAS_FPU)) 165 165 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 166 - &target->thread.fpu.hard, 0, -1); 166 + &target->thread.xstate->hardfpu, 0, -1); 167 167 168 168 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 169 - &target->thread.fpu.soft, 0, -1); 169 + &target->thread.xstate->softfpu, 0, -1); 170 170 } 171 171 172 172 static int fpregs_set(struct task_struct *target, ··· 184 184 185 185 if ((boot_cpu_data.flags & CPU_HAS_FPU)) 186 186 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 187 - &target->thread.fpu.hard, 0, -1); 187 + &target->thread.xstate->hardfpu, 0, -1); 188 188 189 189 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 190 - &target->thread.fpu.soft, 0, -1); 190 + &target->thread.xstate->softfpu, 0, -1); 191 191 } 192 192 193 193 static int fpregs_active(struct task_struct *target, ··· 333 333 else 334 334 tmp = 0; 335 335 } else 336 - tmp = ((long *)&child->thread.fpu) 336 + tmp = ((long *)child->thread.xstate) 337 337 [(addr - (long)&dummy->fpu) >> 2]; 338 338 } else if (addr == (long) &dummy->u_fpvalid) 339 339 tmp = !!tsk_used_math(child); ··· 362 362 else if (addr >= (long) &dummy->fpu && 363 363 addr < (long) &dummy->u_fpvalid) { 364 364 set_stopped_child_used_math(child); 365 - ((long *)&child->thread.fpu) 365 + ((long *)child->thread.xstate) 366 366 [(addr - (long)&dummy->fpu) >> 2] = data; 367 367 ret = 0; 368 368 } else if (addr == (long) &dummy->u_fpvalid) {
+2 -2
arch/sh/kernel/signal_32.c
··· 150 150 return 0; 151 151 152 152 set_used_math(); 153 - return __copy_from_user(&tsk->thread.fpu.hard, &sc->sc_fpregs[0], 153 + return __copy_from_user(&tsk->thread.xstate->hardfpu, &sc->sc_fpregs[0], 154 154 sizeof(long)*(16*2+2)); 155 155 } 156 156 ··· 175 175 clear_used_math(); 176 176 177 177 unlazy_fpu(tsk, regs); 178 - return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.fpu.hard, 178 + return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.xstate->hardfpu, 179 179 sizeof(long)*(16*2+2)); 180 180 } 181 181 #endif /* CONFIG_SH_FPU */
+6 -6
arch/sh/math-emu/math.c
··· 471 471 * denormal_to_double - Given denormalized float number, 472 472 * store double float 473 473 * 474 - * @fpu: Pointer to sh_fpu_hard structure 474 + * @fpu: Pointer to sh_fpu_soft structure 475 475 * @n: Index to FP register 476 476 */ 477 - static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n) 477 + static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n) 478 478 { 479 479 unsigned long du, dl; 480 480 unsigned long x = fpu->fpul; ··· 552 552 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ 553 553 struct task_struct *tsk = current; 554 554 555 - if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) { 555 + if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) { 556 556 /* FPU error */ 557 - denormal_to_double (&tsk->thread.fpu.hard, 557 + denormal_to_double (&tsk->thread.xstate->softfpu, 558 558 (finsn >> 8) & 0xf); 559 - tsk->thread.fpu.hard.fpscr &= 559 + tsk->thread.xstate->softfpu.fpscr &= 560 560 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); 561 561 task_thread_info(tsk)->status |= TS_USEDFPU; 562 562 } else { ··· 617 617 int do_fpu_inst(unsigned short inst, struct pt_regs *regs) 618 618 { 619 619 struct task_struct *tsk = current; 620 - struct sh_fpu_soft_struct *fpu = &(tsk->thread.fpu.soft); 620 + struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu); 621 621 622 622 if (!(task_thread_info(tsk)->status & TS_USEDFPU)) { 623 623 /* initialize once. */