Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sh: Support denormalization on SH-4 FPU.

Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

authored by

Stuart Menefy and committed by
Paul Mundt
c8c0a1ab 453ec9c1

+1258 -180
+1 -1
arch/sh/kernel/cpu/sh4/Makefile
··· 5 5 obj-y := probe.o common.o 6 6 common-y += $(addprefix ../sh3/, entry.o ex.o) 7 7 8 - obj-$(CONFIG_SH_FPU) += fpu.o 8 + obj-$(CONFIG_SH_FPU) += fpu.o softfloat.o 9 9 obj-$(CONFIG_SH_STORE_QUEUES) += sq.o 10 10 11 11 # CPU subtype setup
+333 -179
arch/sh/kernel/cpu/sh4/fpu.c
··· 1 - /* $Id: fpu.c,v 1.4 2004/01/13 05:52:11 kkojima Exp $ 2 - * 3 - * linux/arch/sh/kernel/fpu.c 4 - * 1 + /* 5 2 * Save/restore floating point context for signal handlers. 6 3 * 7 4 * This file is subject to the terms and conditions of the GNU General Public ··· 6 9 * for more details. 7 10 * 8 11 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka 12 + * Copyright (C) 2006 ST Microelectronics Ltd. (denorm support) 9 13 * 10 - * FIXME! These routines can be optimized in big endian case. 14 + * FIXME! These routines have not been tested for big endian case. 11 15 */ 12 - 13 16 #include <linux/sched.h> 14 17 #include <linux/signal.h> 18 + #include <linux/io.h> 19 + #include <asm/cpu/fpu.h> 15 20 #include <asm/processor.h> 16 21 #include <asm/system.h> 17 - #include <asm/io.h> 18 22 19 23 /* The PR (precision) bit in the FP Status Register must be clear when 20 24 * an frchg instruction is executed, otherwise the instruction is undefined. ··· 23 25 */ 24 26 25 27 #define FPSCR_RCHG 0x00000000 28 + extern unsigned long long float64_div(unsigned long long a, 29 + unsigned long long b); 30 + extern unsigned long int float32_div(unsigned long int a, unsigned long int b); 31 + extern unsigned long long float64_mul(unsigned long long a, 32 + unsigned long long b); 33 + extern unsigned long int float32_mul(unsigned long int a, unsigned long int b); 34 + extern unsigned long long float64_add(unsigned long long a, 35 + unsigned long long b); 36 + extern unsigned long int float32_add(unsigned long int a, unsigned long int b); 37 + extern unsigned long long float64_sub(unsigned long long a, 38 + unsigned long long b); 39 + extern unsigned long int float32_sub(unsigned long int a, unsigned long int b); 26 40 41 + static unsigned int fpu_exception_flags; 27 42 28 43 /* 29 44 * Save FPU registers onto task structure. 30 45 * Assume called with FPU enabled (SR.FD=0). 31 46 */ 32 - void 33 - save_fpu(struct task_struct *tsk, struct pt_regs *regs) 47 + void save_fpu(struct task_struct *tsk, struct pt_regs *regs) 34 48 { 35 49 unsigned long dummy; 36 50 37 51 clear_tsk_thread_flag(tsk, TIF_USEDFPU); 38 52 enable_fpu(); 39 - asm volatile("sts.l fpul, @-%0\n\t" 40 - "sts.l fpscr, @-%0\n\t" 41 - "lds %2, fpscr\n\t" 42 - "frchg\n\t" 43 - "fmov.s fr15, @-%0\n\t" 44 - "fmov.s fr14, @-%0\n\t" 45 - "fmov.s fr13, @-%0\n\t" 46 - "fmov.s fr12, @-%0\n\t" 47 - "fmov.s fr11, @-%0\n\t" 48 - "fmov.s fr10, @-%0\n\t" 49 - "fmov.s fr9, @-%0\n\t" 50 - "fmov.s fr8, @-%0\n\t" 51 - "fmov.s fr7, @-%0\n\t" 52 - "fmov.s fr6, @-%0\n\t" 53 - "fmov.s fr5, @-%0\n\t" 54 - "fmov.s fr4, @-%0\n\t" 55 - "fmov.s fr3, @-%0\n\t" 56 - "fmov.s fr2, @-%0\n\t" 57 - "fmov.s fr1, @-%0\n\t" 58 - "fmov.s fr0, @-%0\n\t" 59 - "frchg\n\t" 60 - "fmov.s fr15, @-%0\n\t" 61 - "fmov.s fr14, @-%0\n\t" 62 - "fmov.s fr13, @-%0\n\t" 63 - "fmov.s fr12, @-%0\n\t" 64 - "fmov.s fr11, @-%0\n\t" 65 - "fmov.s fr10, @-%0\n\t" 66 - "fmov.s fr9, @-%0\n\t" 67 - "fmov.s fr8, @-%0\n\t" 68 - "fmov.s fr7, @-%0\n\t" 69 - "fmov.s fr6, @-%0\n\t" 70 - "fmov.s fr5, @-%0\n\t" 71 - "fmov.s fr4, @-%0\n\t" 72 - "fmov.s fr3, @-%0\n\t" 73 - "fmov.s fr2, @-%0\n\t" 74 - "fmov.s fr1, @-%0\n\t" 75 - "fmov.s fr0, @-%0\n\t" 76 - "lds %3, fpscr\n\t" 77 - : "=r" (dummy) 78 - : "0" ((char *)(&tsk->thread.fpu.hard.status)), 79 - "r" (FPSCR_RCHG), 80 - "r" (FPSCR_INIT) 81 - : "memory"); 53 + asm volatile ("sts.l fpul, @-%0\n\t" 54 + "sts.l fpscr, @-%0\n\t" 55 + "lds %2, fpscr\n\t" 56 + "frchg\n\t" 57 + "fmov.s fr15, @-%0\n\t" 58 + "fmov.s fr14, @-%0\n\t" 59 + "fmov.s fr13, @-%0\n\t" 60 + "fmov.s fr12, @-%0\n\t" 61 + "fmov.s fr11, @-%0\n\t" 62 + "fmov.s fr10, @-%0\n\t" 63 + "fmov.s fr9, @-%0\n\t" 64 + "fmov.s fr8, @-%0\n\t" 65 + "fmov.s fr7, @-%0\n\t" 66 + "fmov.s fr6, @-%0\n\t" 67 + "fmov.s fr5, @-%0\n\t" 68 + "fmov.s fr4, @-%0\n\t" 69 + "fmov.s fr3, @-%0\n\t" 70 + "fmov.s fr2, @-%0\n\t" 71 + "fmov.s fr1, @-%0\n\t" 72 + "fmov.s fr0, @-%0\n\t" 73 + "frchg\n\t" 74 + "fmov.s fr15, @-%0\n\t" 75 + "fmov.s fr14, @-%0\n\t" 76 + "fmov.s fr13, @-%0\n\t" 77 + "fmov.s fr12, @-%0\n\t" 78 + "fmov.s fr11, @-%0\n\t" 79 + "fmov.s fr10, @-%0\n\t" 80 + "fmov.s fr9, @-%0\n\t" 81 + "fmov.s fr8, @-%0\n\t" 82 + "fmov.s fr7, @-%0\n\t" 83 + "fmov.s fr6, @-%0\n\t" 84 + "fmov.s fr5, @-%0\n\t" 85 + "fmov.s fr4, @-%0\n\t" 86 + "fmov.s fr3, @-%0\n\t" 87 + "fmov.s fr2, @-%0\n\t" 88 + "fmov.s fr1, @-%0\n\t" 89 + "fmov.s fr0, @-%0\n\t" 90 + "lds %3, fpscr\n\t":"=r" (dummy) 91 + :"0"((char *)(&tsk->thread.fpu.hard.status)), 92 + "r"(FPSCR_RCHG), "r"(FPSCR_INIT) 93 + :"memory"); 82 94 83 95 disable_fpu(); 84 96 release_fpu(regs); 85 97 } 86 98 87 - static void 88 - restore_fpu(struct task_struct *tsk) 99 + static void restore_fpu(struct task_struct *tsk) 89 100 { 90 101 unsigned long dummy; 91 102 92 103 enable_fpu(); 93 - asm volatile("lds %2, fpscr\n\t" 94 - "fmov.s @%0+, fr0\n\t" 95 - "fmov.s @%0+, fr1\n\t" 96 - "fmov.s @%0+, fr2\n\t" 97 - "fmov.s @%0+, fr3\n\t" 98 - "fmov.s @%0+, fr4\n\t" 99 - "fmov.s @%0+, fr5\n\t" 100 - "fmov.s @%0+, fr6\n\t" 101 - "fmov.s @%0+, fr7\n\t" 102 - "fmov.s @%0+, fr8\n\t" 103 - "fmov.s @%0+, fr9\n\t" 104 - "fmov.s @%0+, fr10\n\t" 105 - "fmov.s @%0+, fr11\n\t" 106 - "fmov.s @%0+, fr12\n\t" 107 - "fmov.s @%0+, fr13\n\t" 108 - "fmov.s @%0+, fr14\n\t" 109 - "fmov.s @%0+, fr15\n\t" 110 - "frchg\n\t" 111 - "fmov.s @%0+, fr0\n\t" 112 - "fmov.s @%0+, fr1\n\t" 113 - "fmov.s @%0+, fr2\n\t" 114 - "fmov.s @%0+, fr3\n\t" 115 - "fmov.s @%0+, fr4\n\t" 116 - "fmov.s @%0+, fr5\n\t" 117 - "fmov.s @%0+, fr6\n\t" 118 - "fmov.s @%0+, fr7\n\t" 119 - "fmov.s @%0+, fr8\n\t" 120 - "fmov.s @%0+, fr9\n\t" 121 - "fmov.s @%0+, fr10\n\t" 122 - "fmov.s @%0+, fr11\n\t" 123 - "fmov.s @%0+, fr12\n\t" 124 - "fmov.s @%0+, fr13\n\t" 125 - "fmov.s @%0+, fr14\n\t" 126 - "fmov.s @%0+, fr15\n\t" 127 - "frchg\n\t" 128 - "lds.l @%0+, fpscr\n\t" 129 - "lds.l @%0+, fpul\n\t" 130 - : "=r" (dummy) 131 - : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG) 132 - : "memory"); 104 + asm volatile ("lds %2, fpscr\n\t" 105 + "fmov.s @%0+, fr0\n\t" 106 + "fmov.s @%0+, fr1\n\t" 107 + "fmov.s @%0+, fr2\n\t" 108 + "fmov.s @%0+, fr3\n\t" 109 + "fmov.s @%0+, fr4\n\t" 110 + "fmov.s @%0+, fr5\n\t" 111 + "fmov.s @%0+, fr6\n\t" 112 + "fmov.s @%0+, fr7\n\t" 113 + "fmov.s @%0+, fr8\n\t" 114 + "fmov.s @%0+, fr9\n\t" 115 + "fmov.s @%0+, fr10\n\t" 116 + "fmov.s @%0+, fr11\n\t" 117 + "fmov.s @%0+, fr12\n\t" 118 + "fmov.s @%0+, fr13\n\t" 119 + "fmov.s @%0+, fr14\n\t" 120 + "fmov.s @%0+, fr15\n\t" 121 + "frchg\n\t" 122 + "fmov.s @%0+, fr0\n\t" 123 + "fmov.s @%0+, fr1\n\t" 124 + "fmov.s @%0+, fr2\n\t" 125 + "fmov.s @%0+, fr3\n\t" 126 + "fmov.s @%0+, fr4\n\t" 127 + "fmov.s @%0+, fr5\n\t" 128 + "fmov.s @%0+, fr6\n\t" 129 + "fmov.s @%0+, fr7\n\t" 130 + "fmov.s @%0+, fr8\n\t" 131 + "fmov.s @%0+, fr9\n\t" 132 + "fmov.s @%0+, fr10\n\t" 133 + "fmov.s @%0+, fr11\n\t" 134 + "fmov.s @%0+, fr12\n\t" 135 + "fmov.s @%0+, fr13\n\t" 136 + "fmov.s @%0+, fr14\n\t" 137 + "fmov.s @%0+, fr15\n\t" 138 + "frchg\n\t" 139 + "lds.l @%0+, fpscr\n\t" 140 + "lds.l @%0+, fpul\n\t" 141 + :"=r" (dummy) 142 + :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG) 143 + :"memory"); 133 144 disable_fpu(); 134 145 } 135 146 ··· 148 141 * double precision represents signaling NANS. 149 142 */ 150 143 151 - static void 152 - fpu_init(void) 144 + static void fpu_init(void) 153 145 { 154 146 enable_fpu(); 155 - asm volatile("lds %0, fpul\n\t" 156 - "lds %1, fpscr\n\t" 157 - "fsts fpul, fr0\n\t" 158 - "fsts fpul, fr1\n\t" 159 - "fsts fpul, fr2\n\t" 160 - "fsts fpul, fr3\n\t" 161 - "fsts fpul, fr4\n\t" 162 - "fsts fpul, fr5\n\t" 163 - "fsts fpul, fr6\n\t" 164 - "fsts fpul, fr7\n\t" 165 - "fsts fpul, fr8\n\t" 166 - "fsts fpul, fr9\n\t" 167 - "fsts fpul, fr10\n\t" 168 - "fsts fpul, fr11\n\t" 169 - "fsts fpul, fr12\n\t" 170 - "fsts fpul, fr13\n\t" 171 - "fsts fpul, fr14\n\t" 172 - "fsts fpul, fr15\n\t" 173 - "frchg\n\t" 174 - "fsts fpul, fr0\n\t" 175 - "fsts fpul, fr1\n\t" 176 - "fsts fpul, fr2\n\t" 177 - "fsts fpul, fr3\n\t" 178 - "fsts fpul, fr4\n\t" 179 - "fsts fpul, fr5\n\t" 180 - "fsts fpul, fr6\n\t" 181 - "fsts fpul, fr7\n\t" 182 - "fsts fpul, fr8\n\t" 183 - "fsts fpul, fr9\n\t" 184 - "fsts fpul, fr10\n\t" 185 - "fsts fpul, fr11\n\t" 186 - "fsts fpul, fr12\n\t" 187 - "fsts fpul, fr13\n\t" 188 - "fsts fpul, fr14\n\t" 189 - "fsts fpul, fr15\n\t" 190 - "frchg\n\t" 191 - "lds %2, fpscr\n\t" 192 - : /* no output */ 193 - : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT)); 147 + asm volatile ( "lds %0, fpul\n\t" 148 + "lds %1, fpscr\n\t" 149 + "fsts fpul, fr0\n\t" 150 + "fsts fpul, fr1\n\t" 151 + "fsts fpul, fr2\n\t" 152 + "fsts fpul, fr3\n\t" 153 + "fsts fpul, fr4\n\t" 154 + "fsts fpul, fr5\n\t" 155 + "fsts fpul, fr6\n\t" 156 + "fsts fpul, fr7\n\t" 157 + "fsts fpul, fr8\n\t" 158 + "fsts fpul, fr9\n\t" 159 + "fsts fpul, fr10\n\t" 160 + "fsts fpul, fr11\n\t" 161 + "fsts fpul, fr12\n\t" 162 + "fsts fpul, fr13\n\t" 163 + "fsts fpul, fr14\n\t" 164 + "fsts fpul, fr15\n\t" 165 + "frchg\n\t" 166 + "fsts fpul, fr0\n\t" 167 + "fsts fpul, fr1\n\t" 168 + "fsts fpul, fr2\n\t" 169 + "fsts fpul, fr3\n\t" 170 + "fsts fpul, fr4\n\t" 171 + "fsts fpul, fr5\n\t" 172 + "fsts fpul, fr6\n\t" 173 + "fsts fpul, fr7\n\t" 174 + "fsts fpul, fr8\n\t" 175 + "fsts fpul, fr9\n\t" 176 + "fsts fpul, fr10\n\t" 177 + "fsts fpul, fr11\n\t" 178 + "fsts fpul, fr12\n\t" 179 + "fsts fpul, fr13\n\t" 180 + "fsts fpul, fr14\n\t" 181 + "fsts fpul, fr15\n\t" 182 + "frchg\n\t" 183 + "lds %2, fpscr\n\t" 184 + : /* no output */ 185 + :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT)); 194 186 disable_fpu(); 195 187 } 196 188 197 189 /** 198 - * denormal_to_double - Given denormalized float number, 199 - * store double float 190 + * denormal_to_double - Given denormalized float number, 191 + * store double float 200 192 * 201 - * @fpu: Pointer to sh_fpu_hard structure 202 - * @n: Index to FP register 193 + * @fpu: Pointer to sh_fpu_hard structure 194 + * @n: Index to FP register 203 195 */ 204 - static void 205 - denormal_to_double (struct sh_fpu_hard_struct *fpu, int n) 196 + static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n) 206 197 { 207 198 unsigned long du, dl; 208 199 unsigned long x = fpu->fpul; ··· 217 212 dl = x << 29; 218 213 219 214 fpu->fp_regs[n] = du; 220 - fpu->fp_regs[n+1] = dl; 215 + fpu->fp_regs[n + 1] = dl; 221 216 } 222 217 } 223 218 ··· 228 223 * 229 224 * Returns 1 when it's handled (should not cause exception). 230 225 */ 231 - static int 232 - ieee_fpe_handler (struct pt_regs *regs) 226 + static int ieee_fpe_handler(struct pt_regs *regs) 233 227 { 234 - unsigned short insn = *(unsigned short *) regs->pc; 228 + unsigned short insn = *(unsigned short *)regs->pc; 235 229 unsigned short finsn; 236 230 unsigned long nextpc; 237 231 int nib[4] = { 238 232 (insn >> 12) & 0xf, 239 233 (insn >> 8) & 0xf, 240 234 (insn >> 4) & 0xf, 241 - insn & 0xf}; 235 + insn & 0xf 236 + }; 242 237 243 - if (nib[0] == 0xb || 244 - (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ 245 - regs->pr = regs->pc + 4; 246 - if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ 247 - nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); 248 - finsn = *(unsigned short *) (regs->pc + 2); 249 - } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ 238 + if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) 239 + regs->pr = regs->pc + 4; /* bsr & jsr */ 240 + 241 + if (nib[0] == 0xa || nib[0] == 0xb) { 242 + /* bra & bsr */ 243 + nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3); 244 + finsn = *(unsigned short *)(regs->pc + 2); 245 + } else if (nib[0] == 0x8 && nib[1] == 0xd) { 246 + /* bt/s */ 250 247 if (regs->sr & 1) 251 - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); 248 + nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1); 252 249 else 253 250 nextpc = regs->pc + 4; 254 - finsn = *(unsigned short *) (regs->pc + 2); 255 - } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ 251 + finsn = *(unsigned short *)(regs->pc + 2); 252 + } else if (nib[0] == 0x8 && nib[1] == 0xf) { 253 + /* bf/s */ 256 254 if (regs->sr & 1) 257 255 nextpc = regs->pc + 4; 258 256 else 259 - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); 260 - finsn = *(unsigned short *) (regs->pc + 2); 257 + nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1); 258 + finsn = *(unsigned short *)(regs->pc + 2); 261 259 } else if (nib[0] == 0x4 && nib[3] == 0xb && 262 - (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ 260 + (nib[2] == 0x0 || nib[2] == 0x2)) { 261 + /* jmp & jsr */ 263 262 nextpc = regs->regs[nib[1]]; 264 - finsn = *(unsigned short *) (regs->pc + 2); 263 + finsn = *(unsigned short *)(regs->pc + 2); 265 264 } else if (nib[0] == 0x0 && nib[3] == 0x3 && 266 - (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ 265 + (nib[2] == 0x0 || nib[2] == 0x2)) { 266 + /* braf & bsrf */ 267 267 nextpc = regs->pc + 4 + regs->regs[nib[1]]; 268 - finsn = *(unsigned short *) (regs->pc + 2); 269 - } else if (insn == 0x000b) { /* rts */ 268 + finsn = *(unsigned short *)(regs->pc + 2); 269 + } else if (insn == 0x000b) { 270 + /* rts */ 270 271 nextpc = regs->pr; 271 - finsn = *(unsigned short *) (regs->pc + 2); 272 + finsn = *(unsigned short *)(regs->pc + 2); 272 273 } else { 273 274 nextpc = regs->pc + instruction_size(insn); 274 275 finsn = insn; 275 276 } 276 277 277 - if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ 278 + if ((finsn & 0xf1ff) == 0xf0ad) { 279 + /* fcnvsd */ 278 280 struct task_struct *tsk = current; 279 281 280 282 save_fpu(tsk, regs); 281 - if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) { 283 + if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)) 282 284 /* FPU error */ 283 - denormal_to_double (&tsk->thread.fpu.hard, 284 - (finsn >> 8) & 0xf); 285 - tsk->thread.fpu.hard.fpscr &= 286 - ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); 287 - grab_fpu(regs); 288 - restore_fpu(tsk); 289 - set_tsk_thread_flag(tsk, TIF_USEDFPU); 285 + denormal_to_double(&tsk->thread.fpu.hard, 286 + (finsn >> 8) & 0xf); 287 + else 288 + return 0; 289 + 290 + regs->pc = nextpc; 291 + return 1; 292 + } else if ((finsn & 0xf00f) == 0xf002) { 293 + /* fmul */ 294 + struct task_struct *tsk = current; 295 + int fpscr; 296 + int n, m, prec; 297 + unsigned int hx, hy; 298 + 299 + n = (finsn >> 8) & 0xf; 300 + m = (finsn >> 4) & 0xf; 301 + hx = tsk->thread.fpu.hard.fp_regs[n]; 302 + hy = tsk->thread.fpu.hard.fp_regs[m]; 303 + fpscr = tsk->thread.fpu.hard.fpscr; 304 + prec = fpscr & FPSCR_DBL_PRECISION; 305 + 306 + if ((fpscr & FPSCR_CAUSE_ERROR) 307 + && (prec && ((hx & 0x7fffffff) < 0x00100000 308 + || (hy & 0x7fffffff) < 0x00100000))) { 309 + long long llx, lly; 310 + 311 + /* FPU error because of denormal (doubles) */ 312 + llx = ((long long)hx << 32) 313 + | tsk->thread.fpu.hard.fp_regs[n + 1]; 314 + lly = ((long long)hy << 32) 315 + | tsk->thread.fpu.hard.fp_regs[m + 1]; 316 + llx = float64_mul(llx, lly); 317 + tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 318 + tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; 319 + } else if ((fpscr & FPSCR_CAUSE_ERROR) 320 + && (!prec && ((hx & 0x7fffffff) < 0x00800000 321 + || (hy & 0x7fffffff) < 0x00800000))) { 322 + /* FPU error because of denormal (floats) */ 323 + hx = float32_mul(hx, hy); 324 + tsk->thread.fpu.hard.fp_regs[n] = hx; 290 325 } else 291 - force_sig(SIGFPE, tsk); 326 + return 0; 327 + 328 + regs->pc = nextpc; 329 + return 1; 330 + } else if ((finsn & 0xf00e) == 0xf000) { 331 + /* fadd, fsub */ 332 + struct task_struct *tsk = current; 333 + int fpscr; 334 + int n, m, prec; 335 + unsigned int hx, hy; 336 + 337 + n = (finsn >> 8) & 0xf; 338 + m = (finsn >> 4) & 0xf; 339 + hx = tsk->thread.fpu.hard.fp_regs[n]; 340 + hy = tsk->thread.fpu.hard.fp_regs[m]; 341 + fpscr = tsk->thread.fpu.hard.fpscr; 342 + prec = fpscr & FPSCR_DBL_PRECISION; 343 + 344 + if ((fpscr & FPSCR_CAUSE_ERROR) 345 + && (prec && ((hx & 0x7fffffff) < 0x00100000 346 + || (hy & 0x7fffffff) < 0x00100000))) { 347 + long long llx, lly; 348 + 349 + /* FPU error because of denormal (doubles) */ 350 + llx = ((long long)hx << 32) 351 + | tsk->thread.fpu.hard.fp_regs[n + 1]; 352 + lly = ((long long)hy << 32) 353 + | tsk->thread.fpu.hard.fp_regs[m + 1]; 354 + if ((finsn & 0xf00f) == 0xf000) 355 + llx = float64_add(llx, lly); 356 + else 357 + llx = float64_sub(llx, lly); 358 + tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 359 + tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; 360 + } else if ((fpscr & FPSCR_CAUSE_ERROR) 361 + && (!prec && ((hx & 0x7fffffff) < 0x00800000 362 + || (hy & 0x7fffffff) < 0x00800000))) { 363 + /* FPU error because of denormal (floats) */ 364 + if ((finsn & 0xf00f) == 0xf000) 365 + hx = float32_add(hx, hy); 366 + else 367 + hx = float32_sub(hx, hy); 368 + tsk->thread.fpu.hard.fp_regs[n] = hx; 369 + } else 370 + return 0; 371 + 372 + regs->pc = nextpc; 373 + return 1; 374 + } else if ((finsn & 0xf003) == 0xf003) { 375 + /* fdiv */ 376 + struct task_struct *tsk = current; 377 + int fpscr; 378 + int n, m, prec; 379 + unsigned int hx, hy; 380 + 381 + n = (finsn >> 8) & 0xf; 382 + m = (finsn >> 4) & 0xf; 383 + hx = tsk->thread.fpu.hard.fp_regs[n]; 384 + hy = tsk->thread.fpu.hard.fp_regs[m]; 385 + fpscr = tsk->thread.fpu.hard.fpscr; 386 + prec = fpscr & FPSCR_DBL_PRECISION; 387 + 388 + if ((fpscr & FPSCR_CAUSE_ERROR) 389 + && (prec && ((hx & 0x7fffffff) < 0x00100000 390 + || (hy & 0x7fffffff) < 0x00100000))) { 391 + long long llx, lly; 392 + 393 + /* FPU error because of denormal (doubles) */ 394 + llx = ((long long)hx << 32) 395 + | tsk->thread.fpu.hard.fp_regs[n + 1]; 396 + lly = ((long long)hy << 32) 397 + | tsk->thread.fpu.hard.fp_regs[m + 1]; 398 + 399 + llx = float64_div(llx, lly); 400 + 401 + tsk->thread.fpu.hard.fp_regs[n] = llx >> 32; 402 + tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff; 403 + } else if ((fpscr & FPSCR_CAUSE_ERROR) 404 + && (!prec && ((hx & 0x7fffffff) < 0x00800000 405 + || (hy & 0x7fffffff) < 0x00800000))) { 406 + /* FPU error because of denormal (floats) */ 407 + hx = float32_div(hx, hy); 408 + tsk->thread.fpu.hard.fp_regs[n] = hx; 409 + } else 410 + return 0; 292 411 293 412 regs->pc = nextpc; 294 413 return 1; ··· 421 292 return 0; 422 293 } 423 294 295 + void float_raise(unsigned int flags) 296 + { 297 + fpu_exception_flags |= flags; 298 + } 299 + 300 + int float_rounding_mode(void) 301 + { 302 + struct task_struct *tsk = current; 303 + int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr); 304 + return roundingMode; 305 + } 306 + 424 307 BUILD_TRAP_HANDLER(fpu_error) 425 308 { 426 309 struct task_struct *tsk = current; 427 310 TRAP_HANDLER_DECL; 428 311 429 - if (ieee_fpe_handler(regs)) 430 - return; 431 - 432 - regs->pc += 2; 433 312 save_fpu(tsk, regs); 313 + fpu_exception_flags = 0; 314 + if (ieee_fpe_handler(regs)) { 315 + tsk->thread.fpu.hard.fpscr &= 316 + ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); 317 + tsk->thread.fpu.hard.fpscr |= fpu_exception_flags; 318 + /* Set the FPSCR flag as well as cause bits - simply 319 + * replicate the cause */ 320 + tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10); 321 + grab_fpu(regs); 322 + restore_fpu(tsk); 323 + set_tsk_thread_flag(tsk, TIF_USEDFPU); 324 + if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) & 325 + (fpu_exception_flags >> 2)) == 0) { 326 + return; 327 + } 328 + } 329 + 434 330 force_sig(SIGFPE, tsk); 435 331 } 436 332 ··· 473 319 if (used_math()) { 474 320 /* Using the FPU again. */ 475 321 restore_fpu(tsk); 476 - } else { 322 + } else { 477 323 /* First time FPU user. */ 478 324 fpu_init(); 479 325 set_used_math();
+892
arch/sh/kernel/cpu/sh4/softfloat.c
··· 1 + /* 2 + * Floating point emulation support for subnormalised numbers on SH4 3 + * architecture This file is derived from the SoftFloat IEC/IEEE 4 + * Floating-point Arithmetic Package, Release 2 the original license of 5 + * which is reproduced below. 6 + * 7 + * ======================================================================== 8 + * 9 + * This C source file is part of the SoftFloat IEC/IEEE Floating-point 10 + * Arithmetic Package, Release 2. 11 + * 12 + * Written by John R. Hauser. This work was made possible in part by the 13 + * International Computer Science Institute, located at Suite 600, 1947 Center 14 + * Street, Berkeley, California 94704. Funding was partially provided by the 15 + * National Science Foundation under grant MIP-9311980. The original version 16 + * of this code was written as part of a project to build a fixed-point vector 17 + * processor in collaboration with the University of California at Berkeley, 18 + * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 19 + * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 20 + * arithmetic/softfloat.html'. 21 + * 22 + * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 23 + * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 24 + * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 25 + * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 26 + * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 27 + * 28 + * Derivative works are acceptable, even for commercial purposes, so long as 29 + * (1) they include prominent notice that the work is derivative, and (2) they 30 + * include prominent notice akin to these three paragraphs for those parts of 31 + * this code that are retained. 32 + * 33 + * ======================================================================== 34 + * 35 + * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com> 36 + * and Kamel Khelifi <kamel.khelifi@st.com> 37 + */ 38 + #include <linux/kernel.h> 39 + #include <asm/cpu/fpu.h> 40 + 41 + #define LIT64( a ) a##LL 42 + 43 + typedef char flag; 44 + typedef unsigned char uint8; 45 + typedef signed char int8; 46 + typedef int uint16; 47 + typedef int int16; 48 + typedef unsigned int uint32; 49 + typedef signed int int32; 50 + 51 + typedef unsigned long long int bits64; 52 + typedef signed long long int sbits64; 53 + 54 + typedef unsigned char bits8; 55 + typedef signed char sbits8; 56 + typedef unsigned short int bits16; 57 + typedef signed short int sbits16; 58 + typedef unsigned int bits32; 59 + typedef signed int sbits32; 60 + 61 + typedef unsigned long long int uint64; 62 + typedef signed long long int int64; 63 + 64 + typedef unsigned long int float32; 65 + typedef unsigned long long float64; 66 + 67 + extern void float_raise(unsigned int flags); /* in fpu.c */ 68 + extern int float_rounding_mode(void); /* in fpu.c */ 69 + 70 + inline bits64 extractFloat64Frac(float64 a); 71 + inline flag extractFloat64Sign(float64 a); 72 + inline int16 extractFloat64Exp(float64 a); 73 + inline int16 extractFloat32Exp(float32 a); 74 + inline flag extractFloat32Sign(float32 a); 75 + inline bits32 extractFloat32Frac(float32 a); 76 + inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig); 77 + inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr); 78 + inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig); 79 + inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr); 80 + float64 float64_sub(float64 a, float64 b); 81 + float32 float32_sub(float32 a, float32 b); 82 + float32 float32_add(float32 a, float32 b); 83 + float64 float64_add(float64 a, float64 b); 84 + float64 float64_div(float64 a, float64 b); 85 + float32 float32_div(float32 a, float32 b); 86 + float32 float32_mul(float32 a, float32 b); 87 + float64 float64_mul(float64 a, float64 b); 88 + inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, 89 + bits64 * z1Ptr); 90 + inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, 91 + bits64 * z1Ptr); 92 + inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr); 93 + 94 + static int8 countLeadingZeros32(bits32 a); 95 + static int8 countLeadingZeros64(bits64 a); 96 + static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, 97 + bits64 zSig); 98 + static float64 subFloat64Sigs(float64 a, float64 b, flag zSign); 99 + static float64 addFloat64Sigs(float64 a, float64 b, flag zSign); 100 + static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig); 101 + static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, 102 + bits32 zSig); 103 + static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig); 104 + static float32 subFloat32Sigs(float32 a, float32 b, flag zSign); 105 + static float32 addFloat32Sigs(float32 a, float32 b, flag zSign); 106 + static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, 107 + bits64 * zSigPtr); 108 + static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b); 109 + static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, 110 + bits32 * zSigPtr); 111 + 112 + inline bits64 extractFloat64Frac(float64 a) 113 + { 114 + return a & LIT64(0x000FFFFFFFFFFFFF); 115 + } 116 + 117 + inline flag extractFloat64Sign(float64 a) 118 + { 119 + return a >> 63; 120 + } 121 + 122 + inline int16 extractFloat64Exp(float64 a) 123 + { 124 + return (a >> 52) & 0x7FF; 125 + } 126 + 127 + inline int16 extractFloat32Exp(float32 a) 128 + { 129 + return (a >> 23) & 0xFF; 130 + } 131 + 132 + inline flag extractFloat32Sign(float32 a) 133 + { 134 + return a >> 31; 135 + } 136 + 137 + inline bits32 extractFloat32Frac(float32 a) 138 + { 139 + return a & 0x007FFFFF; 140 + } 141 + 142 + inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig) 143 + { 144 + return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig; 145 + } 146 + 147 + inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr) 148 + { 149 + bits64 z; 150 + 151 + if (count == 0) { 152 + z = a; 153 + } else if (count < 64) { 154 + z = (a >> count) | ((a << ((-count) & 63)) != 0); 155 + } else { 156 + z = (a != 0); 157 + } 158 + *zPtr = z; 159 + } 160 + 161 + static int8 countLeadingZeros32(bits32 a) 162 + { 163 + static const int8 countLeadingZerosHigh[] = { 164 + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 165 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 166 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 167 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 168 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 169 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 170 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 171 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 172 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 173 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 174 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 175 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 178 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 179 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 180 + }; 181 + int8 shiftCount; 182 + 183 + shiftCount = 0; 184 + if (a < 0x10000) { 185 + shiftCount += 16; 186 + a <<= 16; 187 + } 188 + if (a < 0x1000000) { 189 + shiftCount += 8; 190 + a <<= 8; 191 + } 192 + shiftCount += countLeadingZerosHigh[a >> 24]; 193 + return shiftCount; 194 + 195 + } 196 + 197 + static int8 countLeadingZeros64(bits64 a) 198 + { 199 + int8 shiftCount; 200 + 201 + shiftCount = 0; 202 + if (a < ((bits64) 1) << 32) { 203 + shiftCount += 32; 204 + } else { 205 + a >>= 32; 206 + } 207 + shiftCount += countLeadingZeros32(a); 208 + return shiftCount; 209 + 210 + } 211 + 212 + static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig) 213 + { 214 + int8 shiftCount; 215 + 216 + shiftCount = countLeadingZeros64(zSig) - 1; 217 + return roundAndPackFloat64(zSign, zExp - shiftCount, 218 + zSig << shiftCount); 219 + 220 + } 221 + 222 + static float64 subFloat64Sigs(float64 a, float64 b, flag zSign) 223 + { 224 + int16 aExp, bExp, zExp; 225 + bits64 aSig, bSig, zSig; 226 + int16 expDiff; 227 + 228 + aSig = extractFloat64Frac(a); 229 + aExp = extractFloat64Exp(a); 230 + bSig = extractFloat64Frac(b); 231 + bExp = extractFloat64Exp(b); 232 + expDiff = aExp - bExp; 233 + aSig <<= 10; 234 + bSig <<= 10; 235 + if (0 < expDiff) 236 + goto aExpBigger; 237 + if (expDiff < 0) 238 + goto bExpBigger; 239 + if (aExp == 0) { 240 + aExp = 1; 241 + bExp = 1; 242 + } 243 + if (bSig < aSig) 244 + goto aBigger; 245 + if (aSig < bSig) 246 + goto bBigger; 247 + return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0); 248 + bExpBigger: 249 + if (bExp == 0x7FF) { 250 + return packFloat64(zSign ^ 1, 0x7FF, 0); 251 + } 252 + if (aExp == 0) { 253 + ++expDiff; 254 + } else { 255 + aSig |= LIT64(0x4000000000000000); 256 + } 257 + shift64RightJamming(aSig, -expDiff, &aSig); 258 + bSig |= LIT64(0x4000000000000000); 259 + bBigger: 260 + zSig = bSig - aSig; 261 + zExp = bExp; 262 + zSign ^= 1; 263 + goto normalizeRoundAndPack; 264 + aExpBigger: 265 + if (aExp == 0x7FF) { 266 + return a; 267 + } 268 + if (bExp == 0) { 269 + --expDiff; 270 + } else { 271 + bSig |= LIT64(0x4000000000000000); 272 + } 273 + shift64RightJamming(bSig, expDiff, &bSig); 274 + aSig |= LIT64(0x4000000000000000); 275 + aBigger: 276 + zSig = aSig - bSig; 277 + zExp = aExp; 278 + normalizeRoundAndPack: 279 + --zExp; 280 + return normalizeRoundAndPackFloat64(zSign, zExp, zSig); 281 + 282 + } 283 + static float64 addFloat64Sigs(float64 a, float64 b, flag zSign) 284 + { 285 + int16 aExp, bExp, zExp; 286 + bits64 aSig, bSig, zSig; 287 + int16 expDiff; 288 + 289 + aSig = extractFloat64Frac(a); 290 + aExp = extractFloat64Exp(a); 291 + bSig = extractFloat64Frac(b); 292 + bExp = extractFloat64Exp(b); 293 + expDiff = aExp - bExp; 294 + aSig <<= 9; 295 + bSig <<= 9; 296 + if (0 < expDiff) { 297 + if (aExp == 0x7FF) { 298 + return a; 299 + } 300 + if (bExp == 0) { 301 + --expDiff; 302 + } else { 303 + bSig |= LIT64(0x2000000000000000); 304 + } 305 + shift64RightJamming(bSig, expDiff, &bSig); 306 + zExp = aExp; 307 + } else if (expDiff < 0) { 308 + if (bExp == 0x7FF) { 309 + return packFloat64(zSign, 0x7FF, 0); 310 + } 311 + if (aExp == 0) { 312 + ++expDiff; 313 + } else { 314 + aSig |= LIT64(0x2000000000000000); 315 + } 316 + shift64RightJamming(aSig, -expDiff, &aSig); 317 + zExp = bExp; 318 + } else { 319 + if (aExp == 0x7FF) { 320 + return a; 321 + } 322 + if (aExp == 0) 323 + return packFloat64(zSign, 0, (aSig + bSig) >> 9); 324 + zSig = LIT64(0x4000000000000000) + aSig + bSig; 325 + zExp = aExp; 326 + goto roundAndPack; 327 + } 328 + aSig |= LIT64(0x2000000000000000); 329 + zSig = (aSig + bSig) << 1; 330 + --zExp; 331 + if ((sbits64) zSig < 0) { 332 + zSig = aSig + bSig; 333 + ++zExp; 334 + } 335 + roundAndPack: 336 + return roundAndPackFloat64(zSign, zExp, zSig); 337 + 338 + } 339 + 340 + inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig) 341 + { 342 + return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig; 343 + } 344 + 345 + inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr) 346 + { 347 + bits32 z; 348 + if (count == 0) { 349 + z = a; 350 + } else if (count < 32) { 351 + z = (a >> count) | ((a << ((-count) & 31)) != 0); 352 + } else { 353 + z = (a != 0); 354 + } 355 + *zPtr = z; 356 + } 357 + 358 + static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig) 359 + { 360 + flag roundNearestEven; 361 + int8 roundIncrement, roundBits; 362 + flag isTiny; 363 + 364 + /* SH4 has only 2 rounding modes - round to nearest and round to zero */ 365 + roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST); 366 + roundIncrement = 0x40; 367 + if (!roundNearestEven) { 368 + roundIncrement = 0; 369 + } 370 + roundBits = zSig & 0x7F; 371 + if (0xFD <= (bits16) zExp) { 372 + if ((0xFD < zExp) 373 + || ((zExp == 0xFD) 374 + && ((sbits32) (zSig + roundIncrement) < 0)) 375 + ) { 376 + float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT); 377 + return packFloat32(zSign, 0xFF, 378 + 0) - (roundIncrement == 0); 379 + } 380 + if (zExp < 0) { 381 + isTiny = (zExp < -1) 382 + || (zSig + roundIncrement < 0x80000000); 383 + shift32RightJamming(zSig, -zExp, &zSig); 384 + zExp = 0; 385 + roundBits = zSig & 0x7F; 386 + if (isTiny && roundBits) 387 + float_raise(FPSCR_CAUSE_UNDERFLOW); 388 + } 389 + } 390 + if (roundBits) 391 + float_raise(FPSCR_CAUSE_INEXACT); 392 + zSig = (zSig + roundIncrement) >> 7; 393 + zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven); 394 + if (zSig == 0) 395 + zExp = 0; 396 + return packFloat32(zSign, zExp, zSig); 397 + 398 + } 399 + 400 + static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig) 401 + { 402 + int8 shiftCount; 403 + 404 + shiftCount = countLeadingZeros32(zSig) - 1; 405 + return roundAndPackFloat32(zSign, zExp - shiftCount, 406 + zSig << shiftCount); 407 + } 408 + 409 + static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig) 410 + { 411 + flag roundNearestEven; 412 + int16 roundIncrement, roundBits; 413 + flag isTiny; 414 + 415 + /* SH4 has only 2 rounding modes - round to nearest and round to zero */ 416 + roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST); 417 + roundIncrement = 0x200; 418 + if (!roundNearestEven) { 419 + roundIncrement = 0; 420 + } 421 + roundBits = zSig & 0x3FF; 422 + if (0x7FD <= (bits16) zExp) { 423 + if ((0x7FD < zExp) 424 + || ((zExp == 0x7FD) 425 + && ((sbits64) (zSig + roundIncrement) < 0)) 426 + ) { 427 + float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT); 428 + return packFloat64(zSign, 0x7FF, 429 + 0) - (roundIncrement == 0); 430 + } 431 + if (zExp < 0) { 432 + isTiny = (zExp < -1) 433 + || (zSig + roundIncrement < 434 + LIT64(0x8000000000000000)); 435 + shift64RightJamming(zSig, -zExp, &zSig); 436 + zExp = 0; 437 + roundBits = zSig & 0x3FF; 438 + if (isTiny && roundBits) 439 + float_raise(FPSCR_CAUSE_UNDERFLOW); 440 + } 441 + } 442 + if (roundBits) 443 + float_raise(FPSCR_CAUSE_INEXACT); 444 + zSig = (zSig + roundIncrement) >> 10; 445 + zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven); 446 + if (zSig == 0) 447 + zExp = 0; 448 + return packFloat64(zSign, zExp, zSig); 449 + 450 + } 451 + 452 + static float32 subFloat32Sigs(float32 a, float32 b, flag zSign) 453 + { 454 + int16 aExp, bExp, zExp; 455 + bits32 aSig, bSig, zSig; 456 + int16 expDiff; 457 + 458 + aSig = extractFloat32Frac(a); 459 + aExp = extractFloat32Exp(a); 460 + bSig = extractFloat32Frac(b); 461 + bExp = extractFloat32Exp(b); 462 + expDiff = aExp - bExp; 463 + aSig <<= 7; 464 + bSig <<= 7; 465 + if (0 < expDiff) 466 + goto aExpBigger; 467 + if (expDiff < 0) 468 + goto bExpBigger; 469 + if (aExp == 0) { 470 + aExp = 1; 471 + bExp = 1; 472 + } 473 + if (bSig < aSig) 474 + goto aBigger; 475 + if (aSig < bSig) 476 + goto bBigger; 477 + return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0); 478 + bExpBigger: 479 + if (bExp == 0xFF) { 480 + return packFloat32(zSign ^ 1, 0xFF, 0); 481 + } 482 + if (aExp == 0) { 483 + ++expDiff; 484 + } else { 485 + aSig |= 0x40000000; 486 + } 487 + shift32RightJamming(aSig, -expDiff, &aSig); 488 + bSig |= 0x40000000; 489 + bBigger: 490 + zSig = bSig - aSig; 491 + zExp = bExp; 492 + zSign ^= 1; 493 + goto normalizeRoundAndPack; 494 + aExpBigger: 495 + if (aExp == 0xFF) { 496 + return a; 497 + } 498 + if (bExp == 0) { 499 + --expDiff; 500 + } else { 501 + bSig |= 0x40000000; 502 + } 503 + shift32RightJamming(bSig, expDiff, &bSig); 504 + aSig |= 0x40000000; 505 + aBigger: 506 + zSig = aSig - bSig; 507 + zExp = aExp; 508 + normalizeRoundAndPack: 509 + --zExp; 510 + return normalizeRoundAndPackFloat32(zSign, zExp, zSig); 511 + 512 + } 513 + 514 + static float32 addFloat32Sigs(float32 a, float32 b, flag zSign) 515 + { 516 + int16 aExp, bExp, zExp; 517 + bits32 aSig, bSig, zSig; 518 + int16 expDiff; 519 + 520 + aSig = extractFloat32Frac(a); 521 + aExp = extractFloat32Exp(a); 522 + bSig = extractFloat32Frac(b); 523 + bExp = extractFloat32Exp(b); 524 + expDiff = aExp - bExp; 525 + aSig <<= 6; 526 + bSig <<= 6; 527 + if (0 < expDiff) { 528 + if (aExp == 0xFF) { 529 + return a; 530 + } 531 + if (bExp == 0) { 532 + --expDiff; 533 + } else { 534 + bSig |= 0x20000000; 535 + } 536 + shift32RightJamming(bSig, expDiff, &bSig); 537 + zExp = aExp; 538 + } else if (expDiff < 0) { 539 + if (bExp == 0xFF) { 540 + return packFloat32(zSign, 0xFF, 0); 541 + } 542 + if (aExp == 0) { 543 + ++expDiff; 544 + } else { 545 + aSig |= 0x20000000; 546 + } 547 + shift32RightJamming(aSig, -expDiff, &aSig); 548 + zExp = bExp; 549 + } else { 550 + if (aExp == 0xFF) { 551 + return a; 552 + } 553 + if (aExp == 0) 554 + return packFloat32(zSign, 0, (aSig + bSig) >> 6); 555 + zSig = 0x40000000 + aSig + bSig; 556 + zExp = aExp; 557 + goto roundAndPack; 558 + } 559 + aSig |= 0x20000000; 560 + zSig = (aSig + bSig) << 1; 561 + --zExp; 562 + if ((sbits32) zSig < 0) { 563 + zSig = aSig + bSig; 564 + ++zExp; 565 + } 566 + roundAndPack: 567 + return roundAndPackFloat32(zSign, zExp, zSig); 568 + 569 + } 570 + 571 + float64 float64_sub(float64 a, float64 b) 572 + { 573 + flag aSign, bSign; 574 + 575 + aSign = extractFloat64Sign(a); 576 + bSign = extractFloat64Sign(b); 577 + if (aSign == bSign) { 578 + return subFloat64Sigs(a, b, aSign); 579 + } else { 580 + return addFloat64Sigs(a, b, aSign); 581 + } 582 + 583 + } 584 + 585 + float32 float32_sub(float32 a, float32 b) 586 + { 587 + flag aSign, bSign; 588 + 589 + aSign = extractFloat32Sign(a); 590 + bSign = extractFloat32Sign(b); 591 + if (aSign == bSign) { 592 + return subFloat32Sigs(a, b, aSign); 593 + } else { 594 + return addFloat32Sigs(a, b, aSign); 595 + } 596 + 597 + } 598 + 599 + float32 float32_add(float32 a, float32 b) 600 + { 601 + flag aSign, bSign; 602 + 603 + aSign = extractFloat32Sign(a); 604 + bSign = extractFloat32Sign(b); 605 + if (aSign == bSign) { 606 + return addFloat32Sigs(a, b, aSign); 607 + } else { 608 + return subFloat32Sigs(a, b, aSign); 609 + } 610 + 611 + } 612 + 613 + float64 float64_add(float64 a, float64 b) 614 + { 615 + flag aSign, bSign; 616 + 617 + aSign = extractFloat64Sign(a); 618 + bSign = extractFloat64Sign(b); 619 + if (aSign == bSign) { 620 + return addFloat64Sigs(a, b, aSign); 621 + } else { 622 + return subFloat64Sigs(a, b, aSign); 623 + } 624 + } 625 + 626 + static void 627 + normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr) 628 + { 629 + int8 shiftCount; 630 + 631 + shiftCount = countLeadingZeros64(aSig) - 11; 632 + *zSigPtr = aSig << shiftCount; 633 + *zExpPtr = 1 - shiftCount; 634 + } 635 + 636 + inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, 637 + bits64 * z1Ptr) 638 + { 639 + bits64 z1; 640 + 641 + z1 = a1 + b1; 642 + *z1Ptr = z1; 643 + *z0Ptr = a0 + b0 + (z1 < a1); 644 + } 645 + 646 + inline void 647 + sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, 648 + bits64 * z1Ptr) 649 + { 650 + *z1Ptr = a1 - b1; 651 + *z0Ptr = a0 - b0 - (a1 < b1); 652 + } 653 + 654 + static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b) 655 + { 656 + bits64 b0, b1; 657 + bits64 rem0, rem1, term0, term1; 658 + bits64 z; 659 + if (b <= a0) 660 + return LIT64(0xFFFFFFFFFFFFFFFF); 661 + b0 = b >> 32; 662 + z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32; 663 + mul64To128(b, z, &term0, &term1); 664 + sub128(a0, a1, term0, term1, &rem0, &rem1); 665 + while (((sbits64) rem0) < 0) { 666 + z -= LIT64(0x100000000); 667 + b1 = b << 32; 668 + add128(rem0, rem1, b0, b1, &rem0, &rem1); 669 + } 670 + rem0 = (rem0 << 32) | (rem1 >> 32); 671 + z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0; 672 + return z; 673 + } 674 + 675 + inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr) 676 + { 677 + bits32 aHigh, aLow, bHigh, bLow; 678 + bits64 z0, zMiddleA, zMiddleB, z1; 679 + 680 + aLow = a; 681 + aHigh = a >> 32; 682 + bLow = b; 683 + bHigh = b >> 32; 684 + z1 = ((bits64) aLow) * bLow; 685 + zMiddleA = ((bits64) aLow) * bHigh; 686 + zMiddleB = ((bits64) aHigh) * bLow; 687 + z0 = ((bits64) aHigh) * bHigh; 688 + zMiddleA += zMiddleB; 689 + z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32); 690 + zMiddleA <<= 32; 691 + z1 += zMiddleA; 692 + z0 += (z1 < zMiddleA); 693 + *z1Ptr = z1; 694 + *z0Ptr = z0; 695 + 696 + } 697 + 698 + static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, 699 + bits32 * zSigPtr) 700 + { 701 + int8 shiftCount; 702 + 703 + shiftCount = countLeadingZeros32(aSig) - 8; 704 + *zSigPtr = aSig << shiftCount; 705 + *zExpPtr = 1 - shiftCount; 706 + 707 + } 708 + 709 + float64 float64_div(float64 a, float64 b) 710 + { 711 + flag aSign, bSign, zSign; 712 + int16 aExp, bExp, zExp; 713 + bits64 aSig, bSig, zSig; 714 + bits64 rem0, rem1; 715 + bits64 term0, term1; 716 + 717 + aSig = extractFloat64Frac(a); 718 + aExp = extractFloat64Exp(a); 719 + aSign = extractFloat64Sign(a); 720 + bSig = extractFloat64Frac(b); 721 + bExp = extractFloat64Exp(b); 722 + bSign = extractFloat64Sign(b); 723 + zSign = aSign ^ bSign; 724 + if (aExp == 0x7FF) { 725 + if (bExp == 0x7FF) { 726 + } 727 + return packFloat64(zSign, 0x7FF, 0); 728 + } 729 + if (bExp == 0x7FF) { 730 + return packFloat64(zSign, 0, 0); 731 + } 732 + if (bExp == 0) { 733 + if (bSig == 0) { 734 + if ((aExp | aSig) == 0) { 735 + float_raise(FPSCR_CAUSE_INVALID); 736 + } 737 + return packFloat64(zSign, 0x7FF, 0); 738 + } 739 + normalizeFloat64Subnormal(bSig, &bExp, &bSig); 740 + } 741 + if (aExp == 0) { 742 + if (aSig == 0) 743 + return packFloat64(zSign, 0, 0); 744 + normalizeFloat64Subnormal(aSig, &aExp, &aSig); 745 + } 746 + zExp = aExp - bExp + 0x3FD; 747 + aSig = (aSig | LIT64(0x0010000000000000)) << 10; 748 + bSig = (bSig | LIT64(0x0010000000000000)) << 11; 749 + if (bSig <= (aSig + aSig)) { 750 + aSig >>= 1; 751 + ++zExp; 752 + } 753 + zSig = estimateDiv128To64(aSig, 0, bSig); 754 + if ((zSig & 0x1FF) <= 2) { 755 + mul64To128(bSig, zSig, &term0, &term1); 756 + sub128(aSig, 0, term0, term1, &rem0, &rem1); 757 + while ((sbits64) rem0 < 0) { 758 + --zSig; 759 + add128(rem0, rem1, 0, bSig, &rem0, &rem1); 760 + } 761 + zSig |= (rem1 != 0); 762 + } 763 + return roundAndPackFloat64(zSign, zExp, zSig); 764 + 765 + } 766 + 767 + float32 float32_div(float32 a, float32 b) 768 + { 769 + flag aSign, bSign, zSign; 770 + int16 aExp, bExp, zExp; 771 + bits32 aSig, bSig, zSig; 772 + 773 + aSig = extractFloat32Frac(a); 774 + aExp = extractFloat32Exp(a); 775 + aSign = extractFloat32Sign(a); 776 + bSig = extractFloat32Frac(b); 777 + bExp = extractFloat32Exp(b); 778 + bSign = extractFloat32Sign(b); 779 + zSign = aSign ^ bSign; 780 + if (aExp == 0xFF) { 781 + if (bExp == 0xFF) { 782 + } 783 + return packFloat32(zSign, 0xFF, 0); 784 + } 785 + if (bExp == 0xFF) { 786 + return packFloat32(zSign, 0, 0); 787 + } 788 + if (bExp == 0) { 789 + if (bSig == 0) { 790 + return packFloat32(zSign, 0xFF, 0); 791 + } 792 + normalizeFloat32Subnormal(bSig, &bExp, &bSig); 793 + } 794 + if (aExp == 0) { 795 + if (aSig == 0) 796 + return packFloat32(zSign, 0, 0); 797 + normalizeFloat32Subnormal(aSig, &aExp, &aSig); 798 + } 799 + zExp = aExp - bExp + 0x7D; 800 + aSig = (aSig | 0x00800000) << 7; 801 + bSig = (bSig | 0x00800000) << 8; 802 + if (bSig <= (aSig + aSig)) { 803 + aSig >>= 1; 804 + ++zExp; 805 + } 806 + zSig = (((bits64) aSig) << 32) / bSig; 807 + if ((zSig & 0x3F) == 0) { 808 + zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32); 809 + } 810 + return roundAndPackFloat32(zSign, zExp, zSig); 811 + 812 + } 813 + 814 + float32 float32_mul(float32 a, float32 b) 815 + { 816 + char aSign, bSign, zSign; 817 + int aExp, bExp, zExp; 818 + unsigned int aSig, bSig; 819 + unsigned long long zSig64; 820 + unsigned int zSig; 821 + 822 + aSig = extractFloat32Frac(a); 823 + aExp = extractFloat32Exp(a); 824 + aSign = extractFloat32Sign(a); 825 + bSig = extractFloat32Frac(b); 826 + bExp = extractFloat32Exp(b); 827 + bSign = extractFloat32Sign(b); 828 + zSign = aSign ^ bSign; 829 + if (aExp == 0) { 830 + if (aSig == 0) 831 + return packFloat32(zSign, 0, 0); 832 + normalizeFloat32Subnormal(aSig, &aExp, &aSig); 833 + } 834 + if (bExp == 0) { 835 + if (bSig == 0) 836 + return packFloat32(zSign, 0, 0); 837 + normalizeFloat32Subnormal(bSig, &bExp, &bSig); 838 + } 839 + if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0)) 840 + return roundAndPackFloat32(zSign, 0xff, 0); 841 + 842 + zExp = aExp + bExp - 0x7F; 843 + aSig = (aSig | 0x00800000) << 7; 844 + bSig = (bSig | 0x00800000) << 8; 845 + shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64); 846 + zSig = zSig64; 847 + if (0 <= (signed int)(zSig << 1)) { 848 + zSig <<= 1; 849 + --zExp; 850 + } 851 + return roundAndPackFloat32(zSign, zExp, zSig); 852 + 853 + } 854 + 855 + float64 float64_mul(float64 a, float64 b) 856 + { 857 + char aSign, bSign, zSign; 858 + int aExp, bExp, zExp; 859 + unsigned long long int aSig, bSig, zSig0, zSig1; 860 + 861 + aSig = extractFloat64Frac(a); 862 + aExp = extractFloat64Exp(a); 863 + aSign = extractFloat64Sign(a); 864 + bSig = extractFloat64Frac(b); 865 + bExp = extractFloat64Exp(b); 866 + bSign = extractFloat64Sign(b); 867 + zSign = aSign ^ bSign; 868 + 869 + if (aExp == 0) { 870 + if (aSig == 0) 871 + return packFloat64(zSign, 0, 0); 872 + normalizeFloat64Subnormal(aSig, &aExp, &aSig); 873 + } 874 + if (bExp == 0) { 875 + if (bSig == 0) 876 + return packFloat64(zSign, 0, 0); 877 + normalizeFloat64Subnormal(bSig, &bExp, &bSig); 878 + } 879 + if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0)) 880 + return roundAndPackFloat64(zSign, 0x7ff, 0); 881 + 882 + zExp = aExp + bExp - 0x3FF; 883 + aSig = (aSig | 0x0010000000000000LL) << 10; 884 + bSig = (bSig | 0x0010000000000000LL) << 11; 885 + mul64To128(aSig, bSig, &zSig0, &zSig1); 886 + zSig0 |= (zSig1 != 0); 887 + if (0 <= (signed long long int)(zSig0 << 1)) { 888 + zSig0 <<= 1; 889 + --zExp; 890 + } 891 + return roundAndPackFloat64(zSign, zExp, zSig0); 892 + }
+32
include/asm-sh/cpu-sh4/fpu.h
··· 1 + /* 2 + * linux/arch/sh/kernel/cpu/sh4/sh4_fpu.h 3 + * 4 + * Copyright (C) 2006 STMicroelectronics Limited 5 + * Author: Carl Shaw <carl.shaw@st.com> 6 + * 7 + * May be copied or modified under the terms of the GNU General Public 8 + * License Version 2. See linux/COPYING for more information. 9 + * 10 + * Definitions for SH4 FPU operations 11 + */ 12 + 13 + #ifndef __CPU_SH4_FPU_H 14 + #define __CPU_SH4_FPU_H 15 + 16 + #define FPSCR_ENABLE_MASK 0x00000f80UL 17 + 18 + #define FPSCR_FMOV_DOUBLE (1<<1) 19 + 20 + #define FPSCR_CAUSE_INEXACT (1<<12) 21 + #define FPSCR_CAUSE_UNDERFLOW (1<<13) 22 + #define FPSCR_CAUSE_OVERFLOW (1<<14) 23 + #define FPSCR_CAUSE_DIVZERO (1<<15) 24 + #define FPSCR_CAUSE_INVALID (1<<16) 25 + #define FPSCR_CAUSE_ERROR (1<<17) 26 + 27 + #define FPSCR_DBL_PRECISION (1<<19) 28 + #define FPSCR_ROUNDING_MODE(x) ((x >> 20) & 3) 29 + #define FPSCR_RM_NEAREST (0) 30 + #define FPSCR_RM_ZERO (1) 31 + 32 + #endif