From 4addd34b0477cf87787afbcea989b3391a81d0a1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 22 Jan 2018 17:29:48 +0800 Subject: [PATCH] nir: Recognize some more open-coded fmin / fmax MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This transformation is inexact because section 4.7.1 (Range and Precision) says: Operations and built-in functions that operate on a NaN are not required to return a NaN as the result. The fmin or fmax might not return NaN in cases where the original expression would be required to return NaN. v2: Reorder operands and mark as inexact. The latter suggested by Jason. shader-db results: Haswell, Broadwell, and Skylake had similar results. (Skylake shown) total instructions in shared programs: 14514817 -> 14514808 (<.01%) instructions in affected programs: 229 -> 220 (-3.93%) helped: 3 HURT: 0 helped stats (abs) min: 1 max: 4 x̄: 3.00 x̃: 4 helped stats (rel) min: 2.86% max: 4.12% x̄: 3.70% x̃: 4.12% total cycles in shared programs: 533145211 -> 533144939 (<.01%) cycles in affected programs: 37268 -> 36996 (-0.73%) helped: 8 HURT: 0 helped stats (abs) min: 2 max: 134 x̄: 34.00 x̃: 2 helped stats (rel) min: 0.02% max: 14.22% x̄: 3.53% x̃: 0.05% Sandy Bridge and Ivy Bridge had similar results. (Ivy Bridge shown) total cycles in shared programs: 257618409 -> 257618403 (<.01%) cycles in affected programs: 12582 -> 12576 (-0.05%) helped: 3 HURT: 0 helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 helped stats (rel) min: 0.05% max: 0.05% x̄: 0.05% x̃: 0.05% No changes on Iron Lake or GM45. Signed-off-by: Ian Romanick Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir_opt_algebraic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 296f0671386..ef240ae0e91 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -170,6 +170,8 @@ optimizations = [ (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), (('bcsel', ('flt', b, a), b, a), ('fmin', a, b)), (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), + (('~bcsel', ('fge', a, b), b, a), ('fmin', a, b)), + (('~bcsel', ('fge', b, a), b, a), ('fmax', a, b)), (('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)), (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)), (('bcsel', a, True, 'b@bool'), ('ior', a, b)), -- 2.30.2