From 1546204cdd45476a2cc4ff4adcc742505366398f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 10 Sep 2018 10:39:42 -0700 Subject: [PATCH] nir/algebraic: sign(x)*x*x is abs(x)*x MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit shader-db results: All Gen7+ platforms had similar results. (Skylake shown) total instructions in shared programs: 15106023 -> 15105981 (<.01%) instructions in affected programs: 300 -> 258 (-14.00%) helped: 6 HURT: 0 helped stats (abs) min: 7 max: 7 x̄: 7.00 x̃: 7 helped stats (rel) min: 14.00% max: 14.00% x̄: 14.00% x̃: 14.00% 95% mean confidence interval for instructions value: -7.00 -7.00 95% mean confidence interval for instructions %-change: -14.00% -14.00% Instructions are helped. total cycles in shared programs: 566050327 -> 566050075 (<.01%) cycles in affected programs: 2826 -> 2574 (-8.92%) helped: 6 HURT: 0 helped stats (abs) min: 40 max: 44 x̄: 42.00 x̃: 42 helped stats (rel) min: 8.89% max: 8.94% x̄: 8.92% x̃: 8.92% 95% mean confidence interval for cycles value: -44.30 -39.70 95% mean confidence interval for cycles %-change: -8.95% -8.88% Cycles are helped. No changes on Gen6 or earlier. Signed-off-by: Ian Romanick Reviewed-by: Thomas Helland --- src/compiler/nir/nir_opt_algebraic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index ae1261f8744..3267e93a583 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -105,6 +105,11 @@ optimizations = [ (('imul', a, 1), a), (('fmul', a, -1.0), ('fneg', a)), (('imul', a, -1), ('ineg', a)), + # If a < 0: fsign(a)*a*a => -1*a*a => -a*a => abs(a)*a + # If a > 0: fsign(a)*a*a => 1*a*a => a*a => abs(a)*a + # If a == 0: fsign(a)*a*a => 0*0*0 => abs(0)*0 + (('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)), + (('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)), (('~ffma', 0.0, a, b), b), (('~ffma', a, 0.0, b), b), (('~ffma', a, b, 0.0), ('fmul', a, b)), -- 2.30.2