From bf2eb3e0eee39e79f5426dfa18d9d3b7f9dfbcb2 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 4 Mar 2020 23:00:24 -0800 Subject: [PATCH] soft-fp64: Split a block that was missing a cast on a comparison MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This function has code like: if (0x7FD <= zExp) { if ((0x7FD < zExp) || ((zExp == 0x7FD) && (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) && increment)) { ... return ...; } if (zExp < 0) { I saw that, and I thought, "Uh... what? Dead code?" I thought it was a bit fishy, so I grabbed the Berkeley SoftFloat Library 3e code, and there is similar code in softfloat_roundPackToF64 (source/s_roundPackToF64.c), but it has an extra (uint16_t) cast in the first comparison. This is basicially a shortcut for if (zExp < 0 || zExp >= 0x7FD) { So, having the nesting kind of makes sense. On a CPU, nesting the flow control can be an optimization. On a GPU, it's just fail. Split the block so that we don't need the uint16_t cast magic. Results on the 308 shaders extracted from the fp64 portion of the OpenGL CTS: Tiger Lake and Ice Lake had similar results. (Tiger Lake shown) total instructions in shared programs: 683638 -> 658127 (-3.73%) instructions in affected programs: 666839 -> 641328 (-3.83%) helped: 92 HURT: 0 helped stats (abs) min: 26 max: 2456 x̄: 277.29 x̃: 144 helped stats (rel) min: 3.21% max: 4.22% x̄: 3.79% x̃: 3.90% 95% mean confidence interval for instructions value: -345.84 -208.75 95% mean confidence interval for instructions %-change: -3.86% -3.73% Instructions are helped. total cycles in shared programs: 5458858 -> 5344600 (-2.09%) cycles in affected programs: 5360114 -> 5245856 (-2.13%) helped: 92 HURT: 0 helped stats (abs) min: 126 max: 10300 x̄: 1241.93 x̃: 655 helped stats (rel) min: 1.71% max: 2.37% x̄: 2.12% x̃: 2.17% 95% mean confidence interval for cycles value: -1539.93 -943.94 95% mean confidence interval for cycles %-change: -2.16% -2.08% Cycles are helped. Fixes: f111d72596c ("glsl: Add "built-in" functions to do add(fp64, fp64)") Reviewed-by: Matt Turner Tested-by: Marge Bot Part-of: --- src/compiler/glsl/float64.glsl | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index d41f0740bed..dd1179012ca 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -500,23 +500,25 @@ __roundAndPackFloat64(uint zSign, } return __packFloat64(zSign, 0x7FF, 0u, 0u); } - if (zExp < 0) { - __shift64ExtraRightJamming( - zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2); - zExp = 0; - if (roundNearestEven) { - increment = zFrac2 < 0u; + } + + if (zExp < 0) { + __shift64ExtraRightJamming( + zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2); + zExp = 0; + if (roundNearestEven) { + increment = zFrac2 < 0u; + } else { + if (zSign != 0u) { + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && + (zFrac2 != 0u); } else { - if (zSign != 0u) { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && - (zFrac2 != 0u); - } else { - increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && - (zFrac2 != 0u); - } + increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && + (zFrac2 != 0u); } } } + if (increment) { __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven)); -- 2.30.2