From 73fa3a1ca44a5eb7bf1c4c5087fcacd912b62e65 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 3 Mar 2020 19:38:13 -0800
Subject: [PATCH] soft-fp64/fadd: Instead of tracking "b < a", track sign of
 the difference
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Results on the 308 shaders extracted from the fp64 portion of the OpenGL
CTS:

Tiger Lake and Ice Lake had similar results. (Tiger Lake shown)
total instructions in shared programs: 824403 -> 822766 (-0.20%)
instructions in affected programs: 756260 -> 754623 (-0.22%)
helped: 68
HURT: 1
helped stats (abs) min: 1 max: 118 xÌ: 26.26 xÌ: 18
helped stats (rel) min: 0.02% max: 0.97% xÌ: 0.31% xÌ: 0.23%
HURT stats (abs)   min: 149 max: 149 xÌ: 149.00 xÌ: 149
HURT stats (rel)   min: 0.17% max: 0.17% xÌ: 0.17% xÌ: 0.17%
95% mean confidence interval for instructions value: -31.94 -15.51
95% mean confidence interval for instructions %-change: -0.37% -0.23%
Instructions are helped.

total cycles in shared programs: 6828935 -> 6816791 (-0.18%)
cycles in affected programs: 6385191 -> 6373047 (-0.19%)
helped: 73
HURT: 0
helped stats (abs) min: 2 max: 852 xÌ: 166.36 xÌ: 120
helped stats (rel) min: <.01% max: 0.80% xÌ: 0.22% xÌ: 0.17%
95% mean confidence interval for cycles value: -210.80 -121.91
95% mean confidence interval for cycles %-change: -0.27% -0.17%
Cycles are helped.

total fills in shared programs: 1442 -> 1497 (3.81%)
fills in affected programs: 1442 -> 1497 (3.81%)
helped: 0
HURT: 1

Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
---
 src/compiler/glsl/float64.glsl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
index 3cc8aa7fa4b..46cae0a4cdb 100644
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -771,14 +771,14 @@ __fadd64(uint64_t a, uint64_t b)
       bExp = mix(bExp, 1, aExp == 0);
       aExp = mix(aExp, 1, aExp == 0);
       bool zexp_normal = false;
-      bool blta = true;
+      uint sign_of_difference = 0;
       if (bFracHi < aFracHi) {
          __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
          zexp_normal = true;
       }
       else if (aFracHi < bFracHi) {
          __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
-         blta = false;
+         sign_of_difference = 0x80000000;
          zexp_normal = true;
       }
       else if (bFracLo < aFracLo) {
@@ -787,11 +787,11 @@ __fadd64(uint64_t a, uint64_t b)
       }
       else if (aFracLo < bFracLo) {
          __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
-          blta = false;
+         sign_of_difference = 0x80000000;
           zexp_normal = true;
       }
-      zExp = mix(bExp, aExp, blta);
-      aSign = mix(aSign ^ 0x80000000u, aSign, blta);
+      zExp = mix(bExp, aExp, sign_of_difference == 0u);
+      aSign ^= sign_of_difference;
       uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u);
       uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);
       return mix(retval_0, retval_1, zexp_normal);
-- 
2.30.2