From abf28d6a70c3219e41c904806f77ea92d31bdb0f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 2 Mar 2020 19:20:42 -0800 Subject: [PATCH] soft-fp64: Relax the way NaN is propagated MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Also reassociate a couple expressions to encourage some CSE. Results on the 308 shaders extracted from the fp64 portion of the OpenGL CTS: Tiger Lake and Ice Lake had similar results. (Tiger Lake shown) total instructions in shared programs: 813599 -> 797951 (-1.92%) instructions in affected programs: 796110 -> 780462 (-1.97%) helped: 92 HURT: 0 helped stats (abs) min: 3 max: 5198 x̄: 170.09 x̃: 83 helped stats (rel) min: 0.36% max: 5.50% x̄: 1.57% x̃: 1.40% 95% mean confidence interval for instructions value: -282.42 -57.75 95% mean confidence interval for instructions %-change: -1.71% -1.42% Instructions are helped. total cycles in shared programs: 6687128 -> 6601437 (-1.28%) cycles in affected programs: 6582246 -> 6496555 (-1.30%) helped: 92 HURT: 0 helped stats (abs) min: 36 max: 14442 x̄: 931.42 x̃: 592 helped stats (rel) min: 0.45% max: 3.16% x̄: 1.44% x̃: 1.23% 95% mean confidence interval for cycles value: -1257.58 -605.27 95% mean confidence interval for cycles %-change: -1.58% -1.30% Cycles are helped. total spills in shared programs: 759 -> 702 (-7.51%) spills in affected programs: 759 -> 702 (-7.51%) helped: 3 HURT: 0 total fills in shared programs: 2412 -> 1442 (-40.22%) fills in affected programs: 2412 -> 1442 (-40.22%) helped: 3 HURT: 0 Reviewed-by: Matt Turner Part-of: --- src/compiler/glsl/float64.glsl | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index c83e1aa8c97..5b0a9dc0c28 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -59,6 +59,11 @@ #define FLOAT_ROUND_UP 3 #define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN +/* Relax propagation of NaN. Binary operations with a NaN source will still + * produce a NaN result, but it won't follow strict IEEE rules. + */ +#define RELAXED_NAN_PROPAGATION + /* Absolute value of a Float64 : * Clear the sign bit */ @@ -639,6 +644,12 @@ __normalizeRoundAndPackFloat64(uint zSign, uint64_t __propagateFloat64NaN(uint64_t __a, uint64_t __b) { +#if defined RELAXED_NAN_PROPAGATION + uvec2 a = unpackUint2x32(__a); + uvec2 b = unpackUint2x32(__b); + + return packUint2x32(uvec2(a.x | b.x, a.y | b.y)); +#else bool aIsNaN = __is_nan(__a); bool bIsNaN = __is_nan(__b); uvec2 a = unpackUint2x32(__a); @@ -647,6 +658,7 @@ __propagateFloat64NaN(uint64_t __a, uint64_t __b) b.y |= 0x00080000u; return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN))); +#endif } /* Returns the result of adding the double-precision floating-point values @@ -674,7 +686,7 @@ __fadd64(uint64_t a, uint64_t b) if (orig_exp_diff_is_zero) { if (aExp == 0x7FF) { - bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u; + bool propagate = ((aFracHi | bFracHi) | (aFracLo| bFracLo)) != 0u; return mix(a, __propagateFloat64NaN(a, b), propagate); } __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); @@ -753,7 +765,7 @@ __fadd64(uint64_t a, uint64_t b) return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1); } if (aExp == 0x7FF) { - bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u; + bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u; return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate); } bExp = mix(bExp, 1, aExp == 0); @@ -879,8 +891,13 @@ __fmul64(uint64_t a, uint64_t b) return __packFloat64(zSign, 0x7FF, 0u, 0u); } if (bExp == 0x7FF) { + /* a cannot be NaN, but is b NaN? */ if ((bFracHi | bFracLo) != 0u) +#if defined RELAXED_NAN_PROPAGATION + return b; +#else return __propagateFloat64NaN(a, b); +#endif if ((uint(aExp) | aFracHi | aFracLo) == 0u) return 0xFFFFFFFFFFFFFFFFUL; return __packFloat64(zSign, 0x7FF, 0u, 0u); -- 2.30.2