mesa WIP: patch bad rounding (was OK on my HW, but not on hydra)

+358
+1
pkgs/development/libraries/mesa/default.nix
··· 39 39 patches = [ 40 40 ./static-gallium.patch 41 41 ./dricore-gallium.patch 42 + ./fix-rounding.patch 42 43 ]; 43 44 44 45 # Change the search path for EGL drivers from $drivers/* to driverLink
+357
pkgs/development/libraries/mesa/fix-rounding.patch
··· 1 + From c25ae5d27b114e23d5734f846002df1a05759658 Mon Sep 17 00:00:00 2001 2 + From: Roland Scheidegger <sroland@vmware.com> 3 + Date: Thu, 31 Jan 2013 19:27:49 +0000 4 + Subject: gallivm: fix issues with trunc/round/floor/ceil with no arch rounding 5 + 6 + The emulation of these if there's no rounding instruction available 7 + is a bit more complicated than what the code did. 8 + In particular, doing fp-to-int/int-to-fp will not work if the exponent 9 + is large enough (and with NaNs, Infs). Hence such values need to be filtered 10 + out and the original value returned in this case (which fortunately should 11 + always be exact). This comes at the expense of performance (if your cpu 12 + doesn't support rounding instructions). 13 + Furthermore, floor/ifloor/ceil/iceil were affected by precision issues for 14 + values near negative (for floor) or positive (for ceil) zero, fix that as well 15 + (fixing this issue might not actually be slower except for ceil/iceil if the 16 + type is not signed which is probably rare - note iceil has no callers left 17 + in any case). 18 + 19 + Also add some new rounding test values in lp_test_arit to actually test 20 + for that stuff (which previously would have failed without sse41). 21 + 22 + This fixes https://bugs.freedesktop.org/show_bug.cgi?id=59701. 23 + --- 24 + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c 25 + index b4e9f23..ec05026 100644 26 + --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 27 + +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c 28 + @@ -1590,12 +1590,37 @@ lp_build_trunc(struct lp_build_context *bld, 29 + return lp_build_round_arch(bld, a, LP_BUILD_ROUND_TRUNCATE); 30 + } 31 + else { 32 + - LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 33 + - LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); 34 + - LLVMValueRef res; 35 + - res = LLVMBuildFPToSI(builder, a, int_vec_type, ""); 36 + - res = LLVMBuildSIToFP(builder, res, vec_type, ""); 37 + - return res; 38 + + const struct lp_type type = bld->type; 39 + + struct lp_type inttype; 40 + + struct lp_build_context intbld; 41 + + LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24); 42 + + LLVMValueRef trunc, res, anosign, mask; 43 + + LLVMTypeRef int_vec_type = bld->int_vec_type; 44 + + LLVMTypeRef vec_type = bld->vec_type; 45 + + 46 + + assert(type.width == 32); /* might want to handle doubles at some point */ 47 + + 48 + + inttype = type; 49 + + inttype.floating = 0; 50 + + lp_build_context_init(&intbld, bld->gallivm, inttype); 51 + + 52 + + /* round by truncation */ 53 + + trunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); 54 + + res = LLVMBuildSIToFP(builder, trunc, vec_type, "floor.trunc"); 55 + + 56 + + /* mask out sign bit */ 57 + + anosign = lp_build_abs(bld, a); 58 + + /* 59 + + * mask out all values if anosign > 2^24 60 + + * This should work both for large ints (all rounding is no-op for them 61 + + * because such floats are always exact) as well as special cases like 62 + + * NaNs, Infs (taking advantage of the fact they use max exponent). 63 + + * (2^24 is arbitrary anything between 2^24 and 2^31 should work.) 64 + + */ 65 + + anosign = LLVMBuildBitCast(builder, anosign, int_vec_type, ""); 66 + + cmpval = LLVMBuildBitCast(builder, cmpval, int_vec_type, ""); 67 + + mask = lp_build_cmp(&intbld, PIPE_FUNC_GREATER, anosign, cmpval); 68 + + return lp_build_select(bld, mask, a, res); 69 + } 70 + } 71 + 72 + @@ -1620,11 +1645,36 @@ lp_build_round(struct lp_build_context *bld, 73 + return lp_build_round_arch(bld, a, LP_BUILD_ROUND_NEAREST); 74 + } 75 + else { 76 + - LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 77 + - LLVMValueRef res; 78 + + const struct lp_type type = bld->type; 79 + + struct lp_type inttype; 80 + + struct lp_build_context intbld; 81 + + LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24); 82 + + LLVMValueRef res, anosign, mask; 83 + + LLVMTypeRef int_vec_type = bld->int_vec_type; 84 + + LLVMTypeRef vec_type = bld->vec_type; 85 + + 86 + + assert(type.width == 32); /* might want to handle doubles at some point */ 87 + + 88 + + inttype = type; 89 + + inttype.floating = 0; 90 + + lp_build_context_init(&intbld, bld->gallivm, inttype); 91 + + 92 + res = lp_build_iround(bld, a); 93 + res = LLVMBuildSIToFP(builder, res, vec_type, ""); 94 + - return res; 95 + + 96 + + /* mask out sign bit */ 97 + + anosign = lp_build_abs(bld, a); 98 + + /* 99 + + * mask out all values if anosign > 2^24 100 + + * This should work both for large ints (all rounding is no-op for them 101 + + * because such floats are always exact) as well as special cases like 102 + + * NaNs, Infs (taking advantage of the fact they use max exponent). 103 + + * (2^24 is arbitrary anything between 2^24 and 2^31 should work.) 104 + + */ 105 + + anosign = LLVMBuildBitCast(builder, anosign, int_vec_type, ""); 106 + + cmpval = LLVMBuildBitCast(builder, cmpval, int_vec_type, ""); 107 + + mask = lp_build_cmp(&intbld, PIPE_FUNC_GREATER, anosign, cmpval); 108 + + return lp_build_select(bld, mask, a, res); 109 + } 110 + } 111 + 112 + @@ -1648,11 +1698,52 @@ lp_build_floor(struct lp_build_context *bld, 113 + return lp_build_round_arch(bld, a, LP_BUILD_ROUND_FLOOR); 114 + } 115 + else { 116 + - LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 117 + - LLVMValueRef res; 118 + - res = lp_build_ifloor(bld, a); 119 + - res = LLVMBuildSIToFP(builder, res, vec_type, ""); 120 + - return res; 121 + + const struct lp_type type = bld->type; 122 + + struct lp_type inttype; 123 + + struct lp_build_context intbld; 124 + + LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24); 125 + + LLVMValueRef trunc, res, anosign, mask; 126 + + LLVMTypeRef int_vec_type = bld->int_vec_type; 127 + + LLVMTypeRef vec_type = bld->vec_type; 128 + + 129 + + assert(type.width == 32); /* might want to handle doubles at some point */ 130 + + 131 + + inttype = type; 132 + + inttype.floating = 0; 133 + + lp_build_context_init(&intbld, bld->gallivm, inttype); 134 + + 135 + + /* round by truncation */ 136 + + trunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); 137 + + res = LLVMBuildSIToFP(builder, trunc, vec_type, "floor.trunc"); 138 + + 139 + + if (type.sign) { 140 + + LLVMValueRef tmp; 141 + + 142 + + /* 143 + + * fix values if rounding is wrong (for non-special cases) 144 + + * - this is the case if trunc > a 145 + + */ 146 + + mask = lp_build_cmp(bld, PIPE_FUNC_GREATER, res, a); 147 + + /* tmp = trunc > a ? 1.0 : 0.0 */ 148 + + tmp = LLVMBuildBitCast(builder, bld->one, int_vec_type, ""); 149 + + tmp = lp_build_and(&intbld, mask, tmp); 150 + + tmp = LLVMBuildBitCast(builder, tmp, vec_type, ""); 151 + + res = lp_build_sub(bld, res, tmp); 152 + + } 153 + + 154 + + /* mask out sign bit */ 155 + + anosign = lp_build_abs(bld, a); 156 + + /* 157 + + * mask out all values if anosign > 2^24 158 + + * This should work both for large ints (all rounding is no-op for them 159 + + * because such floats are always exact) as well as special cases like 160 + + * NaNs, Infs (taking advantage of the fact they use max exponent). 161 + + * (2^24 is arbitrary anything between 2^24 and 2^31 should work.) 162 + + */ 163 + + anosign = LLVMBuildBitCast(builder, anosign, int_vec_type, ""); 164 + + cmpval = LLVMBuildBitCast(builder, cmpval, int_vec_type, ""); 165 + + mask = lp_build_cmp(&intbld, PIPE_FUNC_GREATER, anosign, cmpval); 166 + + return lp_build_select(bld, mask, a, res); 167 + } 168 + } 169 + 170 + @@ -1676,11 +1767,48 @@ lp_build_ceil(struct lp_build_context *bld, 171 + return lp_build_round_arch(bld, a, LP_BUILD_ROUND_CEIL); 172 + } 173 + else { 174 + - LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 175 + - LLVMValueRef res; 176 + - res = lp_build_iceil(bld, a); 177 + - res = LLVMBuildSIToFP(builder, res, vec_type, ""); 178 + - return res; 179 + + const struct lp_type type = bld->type; 180 + + struct lp_type inttype; 181 + + struct lp_build_context intbld; 182 + + LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24); 183 + + LLVMValueRef trunc, res, anosign, mask, tmp; 184 + + LLVMTypeRef int_vec_type = bld->int_vec_type; 185 + + LLVMTypeRef vec_type = bld->vec_type; 186 + + 187 + + assert(type.width == 32); /* might want to handle doubles at some point */ 188 + + 189 + + inttype = type; 190 + + inttype.floating = 0; 191 + + lp_build_context_init(&intbld, bld->gallivm, inttype); 192 + + 193 + + /* round by truncation */ 194 + + trunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); 195 + + trunc = LLVMBuildSIToFP(builder, trunc, vec_type, "ceil.trunc"); 196 + + 197 + + /* 198 + + * fix values if rounding is wrong (for non-special cases) 199 + + * - this is the case if trunc < a 200 + + */ 201 + + mask = lp_build_cmp(bld, PIPE_FUNC_LESS, trunc, a); 202 + + /* tmp = trunc < a ? 1.0 : 0.0 */ 203 + + tmp = LLVMBuildBitCast(builder, bld->one, int_vec_type, ""); 204 + + tmp = lp_build_and(&intbld, mask, tmp); 205 + + tmp = LLVMBuildBitCast(builder, tmp, vec_type, ""); 206 + + res = lp_build_add(bld, trunc, tmp); 207 + + 208 + + /* mask out sign bit */ 209 + + anosign = lp_build_abs(bld, a); 210 + + /* 211 + + * mask out all values if anosign > 2^24 212 + + * This should work both for large ints (all rounding is no-op for them 213 + + * because such floats are always exact) as well as special cases like 214 + + * NaNs, Infs (taking advantage of the fact they use max exponent). 215 + + * (2^24 is arbitrary anything between 2^24 and 2^31 should work.) 216 + + */ 217 + + anosign = LLVMBuildBitCast(builder, anosign, int_vec_type, ""); 218 + + cmpval = LLVMBuildBitCast(builder, cmpval, int_vec_type, ""); 219 + + mask = lp_build_cmp(&intbld, PIPE_FUNC_GREATER, anosign, cmpval); 220 + + return lp_build_select(bld, mask, a, res); 221 + } 222 + } 223 + 224 + @@ -1826,32 +1954,30 @@ lp_build_ifloor(struct lp_build_context *bld, 225 + res = lp_build_round_arch(bld, a, LP_BUILD_ROUND_FLOOR); 226 + } 227 + else { 228 + - /* Take the sign bit and add it to 1 constant */ 229 + - LLVMTypeRef vec_type = bld->vec_type; 230 + - unsigned mantissa = lp_mantissa(type); 231 + - LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, 232 + - (unsigned long long)1 << (type.width - 1)); 233 + - LLVMValueRef sign; 234 + - LLVMValueRef offset; 235 + + struct lp_type inttype; 236 + + struct lp_build_context intbld; 237 + + LLVMValueRef trunc, itrunc, mask; 238 + 239 + - /* sign = a < 0 ? ~0 : 0 */ 240 + - sign = LLVMBuildBitCast(builder, a, int_vec_type, ""); 241 + - sign = LLVMBuildAnd(builder, sign, mask, ""); 242 + - sign = LLVMBuildAShr(builder, sign, 243 + - lp_build_const_int_vec(bld->gallivm, type, 244 + - type.width - 1), 245 + - "ifloor.sign"); 246 + + assert(type.floating); 247 + + assert(lp_check_value(type, a)); 248 + 249 + - /* offset = -0.99999(9)f */ 250 + - offset = lp_build_const_vec(bld->gallivm, type, 251 + - -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); 252 + - offset = LLVMConstBitCast(offset, int_vec_type); 253 + + inttype = type; 254 + + inttype.floating = 0; 255 + + lp_build_context_init(&intbld, bld->gallivm, inttype); 256 + 257 + - /* offset = a < 0 ? offset : 0.0f */ 258 + - offset = LLVMBuildAnd(builder, offset, sign, ""); 259 + - offset = LLVMBuildBitCast(builder, offset, vec_type, "ifloor.offset"); 260 + + /* round by truncation */ 261 + + itrunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); 262 + + trunc = LLVMBuildSIToFP(builder, itrunc, bld->vec_type, "ifloor.trunc"); 263 + 264 + - res = LLVMBuildFAdd(builder, res, offset, "ifloor.res"); 265 + + /* 266 + + * fix values if rounding is wrong (for non-special cases) 267 + + * - this is the case if trunc > a 268 + + * The results of doing this with NaNs, very large values etc. 269 + + * are undefined but this seems to be the case anyway. 270 + + */ 271 + + mask = lp_build_cmp(bld, PIPE_FUNC_GREATER, trunc, a); 272 + + /* cheapie minus one with mask since the mask is minus one / zero */ 273 + + return lp_build_add(&intbld, itrunc, mask); 274 + } 275 + } 276 + 277 + @@ -1883,35 +2009,30 @@ lp_build_iceil(struct lp_build_context *bld, 278 + res = lp_build_round_arch(bld, a, LP_BUILD_ROUND_CEIL); 279 + } 280 + else { 281 + - LLVMTypeRef vec_type = bld->vec_type; 282 + - unsigned mantissa = lp_mantissa(type); 283 + - LLVMValueRef offset; 284 + + struct lp_type inttype; 285 + + struct lp_build_context intbld; 286 + + LLVMValueRef trunc, itrunc, mask; 287 + 288 + - /* offset = 0.99999(9)f */ 289 + - offset = lp_build_const_vec(bld->gallivm, type, 290 + - (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); 291 + + assert(type.floating); 292 + + assert(lp_check_value(type, a)); 293 + 294 + - if (type.sign) { 295 + - LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, 296 + - (unsigned long long)1 << (type.width - 1)); 297 + - LLVMValueRef sign; 298 + + inttype = type; 299 + + inttype.floating = 0; 300 + + lp_build_context_init(&intbld, bld->gallivm, inttype); 301 + 302 + - /* sign = a < 0 ? 0 : ~0 */ 303 + - sign = LLVMBuildBitCast(builder, a, int_vec_type, ""); 304 + - sign = LLVMBuildAnd(builder, sign, mask, ""); 305 + - sign = LLVMBuildAShr(builder, sign, 306 + - lp_build_const_int_vec(bld->gallivm, type, 307 + - type.width - 1), 308 + - "iceil.sign"); 309 + - sign = LLVMBuildNot(builder, sign, "iceil.not"); 310 + - 311 + - /* offset = a < 0 ? 0.0 : offset */ 312 + - offset = LLVMConstBitCast(offset, int_vec_type); 313 + - offset = LLVMBuildAnd(builder, offset, sign, ""); 314 + - offset = LLVMBuildBitCast(builder, offset, vec_type, "iceil.offset"); 315 + - } 316 + + /* round by truncation */ 317 + + itrunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); 318 + + trunc = LLVMBuildSIToFP(builder, itrunc, bld->vec_type, "iceil.trunc"); 319 + 320 + - res = LLVMBuildFAdd(builder, a, offset, "iceil.res"); 321 + + /* 322 + + * fix values if rounding is wrong (for non-special cases) 323 + + * - this is the case if trunc < a 324 + + * The results of doing this with NaNs, very large values etc. 325 + + * are undefined but this seems to be the case anyway. 326 + + */ 327 + + mask = lp_build_cmp(bld, PIPE_FUNC_LESS, trunc, a); 328 + + /* cheapie plus one with mask since the mask is minus one / zero */ 329 + + return lp_build_sub(&intbld, itrunc, mask); 330 + } 331 + 332 + /* round to nearest (toward zero) */ 333 + diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c 334 + index 99928b8..f14e4b3 100644 335 + --- a/src/gallium/drivers/llvmpipe/lp_test_arit.c 336 + +++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c 337 + @@ -207,6 +207,18 @@ const float round_values[] = { 338 + -10.0, -1, 0.0, 12.0, 339 + -1.49, -0.25, 1.25, 2.51, 340 + -0.99, -0.01, 0.01, 0.99, 341 + + 1.401298464324817e-45f, // smallest denormal 342 + + -1.401298464324817e-45f, 343 + + 1.62981451e-08f, 344 + + -1.62981451e-08f, 345 + + 1.62981451e15f, // large number not representable as 32bit int 346 + + -1.62981451e15f, 347 + + FLT_EPSILON, 348 + + -FLT_EPSILON, 349 + + 1.0f - 0.5f*FLT_EPSILON, 350 + + -1.0f + FLT_EPSILON, 351 + + FLT_MAX, 352 + + -FLT_MAX 353 + }; 354 + 355 + static float fractf(float x) 356 + -- 357 + cgit v0.9.0.2-2-gbebe