Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

MIPS: math-emu: Add support for the MIPS R6 MSUBF FPU instruction

MIPS R6 introduced the following instruction:
Floating Point Fused Multiply Subtract:
MSUBF.fmt To perform a fused multiply-subtract of FP values.

MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft])

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/10957/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

authored by

Markos Chandras and committed by
Ralf Baechle
83d43305 e24c3bec

+559 -2
+2 -2
arch/mips/math-emu/Makefile
··· 4 4 5 5 obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \ 6 6 dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ 7 - dp_tint.o dp_fint.o dp_maddf.o \ 7 + dp_tint.o dp_fint.o dp_maddf.o dp_msubf.o \ 8 8 sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ 9 - sp_tint.o sp_fint.o sp_maddf.o \ 9 + sp_tint.o sp_fint.o sp_maddf.o sp_msubf.o \ 10 10 dsemul.o 11 11 12 12 lib-y += ieee754d.o \
+26
arch/mips/math-emu/cp1emu.c
··· 1778 1778 break; 1779 1779 } 1780 1780 1781 + case fmsubf_op: { 1782 + union ieee754sp ft, fs, fd; 1783 + 1784 + if (!cpu_has_mips_r6) 1785 + return SIGILL; 1786 + 1787 + SPFROMREG(ft, MIPSInst_FT(ir)); 1788 + SPFROMREG(fs, MIPSInst_FS(ir)); 1789 + SPFROMREG(fd, MIPSInst_FD(ir)); 1790 + rv.s = ieee754sp_msubf(fd, fs, ft); 1791 + break; 1792 + } 1793 + 1781 1794 case fabs_op: 1782 1795 handler.u = ieee754sp_abs; 1783 1796 goto scopuop; ··· 2021 2008 DPFROMREG(fs, MIPSInst_FS(ir)); 2022 2009 DPFROMREG(fd, MIPSInst_FD(ir)); 2023 2010 rv.d = ieee754dp_maddf(fd, fs, ft); 2011 + break; 2012 + } 2013 + 2014 + case fmsubf_op: { 2015 + union ieee754dp ft, fs, fd; 2016 + 2017 + if (!cpu_has_mips_r6) 2018 + return SIGILL; 2019 + 2020 + DPFROMREG(ft, MIPSInst_FT(ir)); 2021 + DPFROMREG(fs, MIPSInst_FS(ir)); 2022 + DPFROMREG(fd, MIPSInst_FD(ir)); 2023 + rv.d = ieee754dp_msubf(fd, fs, ft); 2024 2024 break; 2025 2025 } 2026 2026
+269
arch/mips/math-emu/dp_msubf.c
··· 1 + /* 2 + * IEEE754 floating point arithmetic 3 + * double precision: MSUB.f (Fused Multiply Subtract) 4 + * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) 5 + * 6 + * MIPS floating point support 7 + * Copyright (C) 2015 Imagination Technologies, Ltd. 8 + * Author: Markos Chandras <markos.chandras@imgtec.com> 9 + * 10 + * This program is free software; you can distribute it and/or modify it 11 + * under the terms of the GNU General Public License as published by the 12 + * Free Software Foundation; version 2 of the License. 13 + */ 14 + 15 + #include "ieee754dp.h" 16 + 17 + union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, 18 + union ieee754dp y) 19 + { 20 + int re; 21 + int rs; 22 + u64 rm; 23 + unsigned lxm; 24 + unsigned hxm; 25 + unsigned lym; 26 + unsigned hym; 27 + u64 lrm; 28 + u64 hrm; 29 + u64 t; 30 + u64 at; 31 + int s; 32 + 33 + COMPXDP; 34 + COMPYDP; 35 + 36 + u64 zm; int ze; int zs __maybe_unused; int zc; 37 + 38 + EXPLODEXDP; 39 + EXPLODEYDP; 40 + EXPLODEDP(z, zc, zs, ze, zm) 41 + 42 + FLUSHXDP; 43 + FLUSHYDP; 44 + FLUSHDP(z, zc, zs, ze, zm); 45 + 46 + ieee754_clearcx(); 47 + 48 + switch (zc) { 49 + case IEEE754_CLASS_SNAN: 50 + ieee754_setcx(IEEE754_INVALID_OPERATION); 51 + return ieee754dp_nanxcpt(z); 52 + case IEEE754_CLASS_DNORM: 53 + DPDNORMx(zm, ze); 54 + /* QNAN is handled separately below */ 55 + } 56 + 57 + switch (CLPAIR(xc, yc)) { 58 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): 59 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): 60 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): 61 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): 62 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): 63 + return ieee754dp_nanxcpt(y); 64 + 65 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): 66 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): 67 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): 68 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): 69 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): 70 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): 71 + return ieee754dp_nanxcpt(x); 72 + 73 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): 74 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): 75 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): 76 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): 77 + return y; 78 + 79 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): 80 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): 81 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): 82 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): 83 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): 84 + return x; 85 + 86 + 87 + /* 88 + * Infinity handling 89 + */ 90 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 91 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 92 + if (zc == IEEE754_CLASS_QNAN) 93 + return z; 94 + ieee754_setcx(IEEE754_INVALID_OPERATION); 95 + return ieee754dp_indef(); 96 + 97 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 98 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 99 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 100 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 101 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 102 + if (zc == IEEE754_CLASS_QNAN) 103 + return z; 104 + return ieee754dp_inf(xs ^ ys); 105 + 106 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 107 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 108 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 109 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 110 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 111 + if (zc == IEEE754_CLASS_INF) 112 + return ieee754dp_inf(zs); 113 + /* Multiplication is 0 so just return z */ 114 + return z; 115 + 116 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 117 + DPDNORMX; 118 + 119 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 120 + if (zc == IEEE754_CLASS_QNAN) 121 + return z; 122 + else if (zc == IEEE754_CLASS_INF) 123 + return ieee754dp_inf(zs); 124 + DPDNORMY; 125 + break; 126 + 127 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 128 + if (zc == IEEE754_CLASS_QNAN) 129 + return z; 130 + else if (zc == IEEE754_CLASS_INF) 131 + return ieee754dp_inf(zs); 132 + DPDNORMX; 133 + break; 134 + 135 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 136 + if (zc == IEEE754_CLASS_QNAN) 137 + return z; 138 + else if (zc == IEEE754_CLASS_INF) 139 + return ieee754dp_inf(zs); 140 + /* fall through to real computations */ 141 + } 142 + 143 + /* Finally get to do some computation */ 144 + 145 + /* 146 + * Do the multiplication bit first 147 + * 148 + * rm = xm * ym, re = xe + ye basically 149 + * 150 + * At this point xm and ym should have been normalized. 151 + */ 152 + assert(xm & DP_HIDDEN_BIT); 153 + assert(ym & DP_HIDDEN_BIT); 154 + 155 + re = xe + ye; 156 + rs = xs ^ ys; 157 + 158 + /* shunt to top of word */ 159 + xm <<= 64 - (DP_FBITS + 1); 160 + ym <<= 64 - (DP_FBITS + 1); 161 + 162 + /* 163 + * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. 164 + */ 165 + 166 + /* 32 * 32 => 64 */ 167 + #define DPXMULT(x, y) ((u64)(x) * (u64)y) 168 + 169 + lxm = xm; 170 + hxm = xm >> 32; 171 + lym = ym; 172 + hym = ym >> 32; 173 + 174 + lrm = DPXMULT(lxm, lym); 175 + hrm = DPXMULT(hxm, hym); 176 + 177 + t = DPXMULT(lxm, hym); 178 + 179 + at = lrm + (t << 32); 180 + hrm += at < lrm; 181 + lrm = at; 182 + 183 + hrm = hrm + (t >> 32); 184 + 185 + t = DPXMULT(hxm, lym); 186 + 187 + at = lrm + (t << 32); 188 + hrm += at < lrm; 189 + lrm = at; 190 + 191 + hrm = hrm + (t >> 32); 192 + 193 + rm = hrm | (lrm != 0); 194 + 195 + /* 196 + * Sticky shift down to normal rounding precision. 197 + */ 198 + if ((s64) rm < 0) { 199 + rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | 200 + ((rm << (DP_FBITS + 1 + 3)) != 0); 201 + re++; 202 + } else { 203 + rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | 204 + ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); 205 + } 206 + assert(rm & (DP_HIDDEN_BIT << 3)); 207 + 208 + /* And now the subtraction */ 209 + 210 + /* flip sign of r and handle as add */ 211 + rs ^= 1; 212 + 213 + assert(zm & DP_HIDDEN_BIT); 214 + 215 + /* 216 + * Provide guard,round and stick bit space. 217 + */ 218 + zm <<= 3; 219 + 220 + if (ze > re) { 221 + /* 222 + * Have to shift y fraction right to align. 223 + */ 224 + s = ze - re; 225 + rm = XDPSRS(rm, s); 226 + re += s; 227 + } else if (re > ze) { 228 + /* 229 + * Have to shift x fraction right to align. 230 + */ 231 + s = re - ze; 232 + zm = XDPSRS(zm, s); 233 + ze += s; 234 + } 235 + assert(ze == re); 236 + assert(ze <= DP_EMAX); 237 + 238 + if (zs == rs) { 239 + /* 240 + * Generate 28 bit result of adding two 27 bit numbers 241 + * leaving result in xm, xs and xe. 242 + */ 243 + zm = zm + rm; 244 + 245 + if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ 246 + zm = XDPSRS1(zm); 247 + ze++; 248 + } 249 + } else { 250 + if (zm >= rm) { 251 + zm = zm - rm; 252 + } else { 253 + zm = rm - zm; 254 + zs = rs; 255 + } 256 + if (zm == 0) 257 + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 258 + 259 + /* 260 + * Normalize to rounding precision. 261 + */ 262 + while ((zm >> (DP_FBITS + 3)) == 0) { 263 + zm <<= 1; 264 + ze--; 265 + } 266 + } 267 + 268 + return ieee754dp_format(zs, ze, zm); 269 + }
+4
arch/mips/math-emu/ieee754.h
··· 77 77 78 78 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, 79 79 union ieee754sp y); 80 + union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, 81 + union ieee754sp y); 80 82 81 83 /* 82 84 * double precision (often aka double) ··· 105 103 union ieee754dp ieee754dp_sqrt(union ieee754dp x); 106 104 107 105 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, 106 + union ieee754dp y); 107 + union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, 108 108 union ieee754dp y); 109 109 110 110
+258
arch/mips/math-emu/sp_msubf.c
··· 1 + /* 2 + * IEEE754 floating point arithmetic 3 + * single precision: MSUB.f (Fused Multiply Subtract) 4 + * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) 5 + * 6 + * MIPS floating point support 7 + * Copyright (C) 2015 Imagination Technologies, Ltd. 8 + * Author: Markos Chandras <markos.chandras@imgtec.com> 9 + * 10 + * This program is free software; you can distribute it and/or modify it 11 + * under the terms of the GNU General Public License as published by the 12 + * Free Software Foundation; version 2 of the License. 13 + */ 14 + 15 + #include "ieee754sp.h" 16 + 17 + union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, 18 + union ieee754sp y) 19 + { 20 + int re; 21 + int rs; 22 + unsigned rm; 23 + unsigned short lxm; 24 + unsigned short hxm; 25 + unsigned short lym; 26 + unsigned short hym; 27 + unsigned lrm; 28 + unsigned hrm; 29 + unsigned t; 30 + unsigned at; 31 + int s; 32 + 33 + COMPXSP; 34 + COMPYSP; 35 + u32 zm; int ze; int zs __maybe_unused; int zc; 36 + 37 + EXPLODEXSP; 38 + EXPLODEYSP; 39 + EXPLODESP(z, zc, zs, ze, zm) 40 + 41 + FLUSHXSP; 42 + FLUSHYSP; 43 + FLUSHSP(z, zc, zs, ze, zm); 44 + 45 + ieee754_clearcx(); 46 + 47 + switch (zc) { 48 + case IEEE754_CLASS_SNAN: 49 + ieee754_setcx(IEEE754_INVALID_OPERATION); 50 + return ieee754sp_nanxcpt(z); 51 + case IEEE754_CLASS_DNORM: 52 + SPDNORMx(zm, ze); 53 + /* QNAN is handled separately below */ 54 + } 55 + 56 + switch (CLPAIR(xc, yc)) { 57 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): 58 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): 59 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): 60 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): 61 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): 62 + return ieee754sp_nanxcpt(y); 63 + 64 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): 65 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): 66 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): 67 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): 68 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): 69 + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): 70 + return ieee754sp_nanxcpt(x); 71 + 72 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): 73 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): 74 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): 75 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): 76 + return y; 77 + 78 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): 79 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): 80 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): 81 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): 82 + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): 83 + return x; 84 + 85 + /* 86 + * Infinity handling 87 + */ 88 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 89 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 90 + if (zc == IEEE754_CLASS_QNAN) 91 + return z; 92 + ieee754_setcx(IEEE754_INVALID_OPERATION); 93 + return ieee754sp_indef(); 94 + 95 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 96 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 97 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 98 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 99 + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 100 + if (zc == IEEE754_CLASS_QNAN) 101 + return z; 102 + return ieee754sp_inf(xs ^ ys); 103 + 104 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 105 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 106 + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 107 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 108 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 109 + if (zc == IEEE754_CLASS_INF) 110 + return ieee754sp_inf(zs); 111 + /* Multiplication is 0 so just return z */ 112 + return z; 113 + 114 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 115 + SPDNORMX; 116 + 117 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 118 + if (zc == IEEE754_CLASS_QNAN) 119 + return z; 120 + else if (zc == IEEE754_CLASS_INF) 121 + return ieee754sp_inf(zs); 122 + SPDNORMY; 123 + break; 124 + 125 + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 126 + if (zc == IEEE754_CLASS_QNAN) 127 + return z; 128 + else if (zc == IEEE754_CLASS_INF) 129 + return ieee754sp_inf(zs); 130 + SPDNORMX; 131 + break; 132 + 133 + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 134 + if (zc == IEEE754_CLASS_QNAN) 135 + return z; 136 + else if (zc == IEEE754_CLASS_INF) 137 + return ieee754sp_inf(zs); 138 + /* fall through to real compuation */ 139 + } 140 + 141 + /* Finally get to do some computation */ 142 + 143 + /* 144 + * Do the multiplication bit first 145 + * 146 + * rm = xm * ym, re = xe + ye basically 147 + * 148 + * At this point xm and ym should have been normalized. 149 + */ 150 + 151 + /* rm = xm * ym, re = xe+ye basically */ 152 + assert(xm & SP_HIDDEN_BIT); 153 + assert(ym & SP_HIDDEN_BIT); 154 + 155 + re = xe + ye; 156 + rs = xs ^ ys; 157 + 158 + /* shunt to top of word */ 159 + xm <<= 32 - (SP_FBITS + 1); 160 + ym <<= 32 - (SP_FBITS + 1); 161 + 162 + /* 163 + * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. 164 + */ 165 + lxm = xm & 0xffff; 166 + hxm = xm >> 16; 167 + lym = ym & 0xffff; 168 + hym = ym >> 16; 169 + 170 + lrm = lxm * lym; /* 16 * 16 => 32 */ 171 + hrm = hxm * hym; /* 16 * 16 => 32 */ 172 + 173 + t = lxm * hym; /* 16 * 16 => 32 */ 174 + at = lrm + (t << 16); 175 + hrm += at < lrm; 176 + lrm = at; 177 + hrm = hrm + (t >> 16); 178 + 179 + t = hxm * lym; /* 16 * 16 => 32 */ 180 + at = lrm + (t << 16); 181 + hrm += at < lrm; 182 + lrm = at; 183 + hrm = hrm + (t >> 16); 184 + 185 + rm = hrm | (lrm != 0); 186 + 187 + /* 188 + * Sticky shift down to normal rounding precision. 189 + */ 190 + if ((int) rm < 0) { 191 + rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | 192 + ((rm << (SP_FBITS + 1 + 3)) != 0); 193 + re++; 194 + } else { 195 + rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | 196 + ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); 197 + } 198 + assert(rm & (SP_HIDDEN_BIT << 3)); 199 + 200 + /* And now the subtraction */ 201 + 202 + /* Flip sign of r and handle as add */ 203 + rs ^= 1; 204 + 205 + assert(zm & SP_HIDDEN_BIT); 206 + 207 + /* 208 + * Provide guard,round and stick bit space. 209 + */ 210 + zm <<= 3; 211 + 212 + if (ze > re) { 213 + /* 214 + * Have to shift y fraction right to align. 215 + */ 216 + s = ze - re; 217 + SPXSRSYn(s); 218 + } else if (re > ze) { 219 + /* 220 + * Have to shift x fraction right to align. 221 + */ 222 + s = re - ze; 223 + SPXSRSYn(s); 224 + } 225 + assert(ze == re); 226 + assert(ze <= SP_EMAX); 227 + 228 + if (zs == rs) { 229 + /* 230 + * Generate 28 bit result of adding two 27 bit numbers 231 + * leaving result in zm, zs and ze. 232 + */ 233 + zm = zm + rm; 234 + 235 + if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ 236 + SPXSRSX1(); /* shift preserving sticky */ 237 + } 238 + } else { 239 + if (zm >= rm) { 240 + zm = zm - rm; 241 + } else { 242 + zm = rm - zm; 243 + zs = rs; 244 + } 245 + if (zm == 0) 246 + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); 247 + 248 + /* 249 + * Normalize in extended single precision 250 + */ 251 + while ((zm >> (SP_MBITS + 3)) == 0) { 252 + zm <<= 1; 253 + ze--; 254 + } 255 + 256 + } 257 + return ieee754sp_format(zs, ze, zm); 258 + }