From 403aac7500b1dbf439d8c50caf611bff536e7417 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 5 Sep 2018 20:45:19 -0700 Subject: [PATCH] nir/algebraic: Replace a pattern where iand with a Boolean is used as a bcsel MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit All of the affected shaders are in Mad Max. I noticed this while looking at some other things. I tried a couple similar patterns, but the affect on cycles was general negative. It may be worth revisiting this later. v2: Rebase on 1-bit Boolean changes. All Gen7+ platforms had similar results. (Skylake shown) total instructions in shared programs: 15282073 -> 15282053 (<.01%) instructions in affected programs: 1192 -> 1172 (-1.68%) helped: 14 HURT: 0 helped stats (abs) min: 1 max: 2 x̄: 1.43 x̃: 1 helped stats (rel) min: 1.16% max: 2.17% x̄: 1.65% x̃: 1.39% 95% mean confidence interval for instructions value: -1.73 -1.13 95% mean confidence interval for instructions %-change: -1.91% -1.38% Instructions are helped. total cycles in shared programs: 372595954 -> 372594532 (<.01%) cycles in affected programs: 11477 -> 10055 (-12.39%) helped: 14 HURT: 0 helped stats (abs) min: 76 max: 122 x̄: 101.57 x̃: 104 helped stats (rel) min: 7.76% max: 15.62% x̄: 12.94% x̃: 14.78% 95% mean confidence interval for cycles value: -111.05 -92.09 95% mean confidence interval for cycles %-change: -14.90% -10.98% Cycles are helped. No changes on any Gen6 or earlier platforms. Reviewed-by: Matt Turner --- src/compiler/nir/nir_opt_algebraic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f275d70d276..23412aca029 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -35,6 +35,7 @@ a = 'a' b = 'b' c = 'c' d = 'd' +e = 'e' # Written in the form (, ) where is an expression # and is either an expression or a value. An expression is @@ -145,6 +146,9 @@ optimizations = [ (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'), + (('~fmul', ('fadd', ('iand', ('ineg', ('b2i32', 'a@bool')), ('fmul', b, c)), '#d'), '#e'), + ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))), + (('fdot4', ('vec4', a, b, c, 1.0), d), ('fdph', ('vec3', a, b, c), d)), (('fdot4', ('vec4', a, 0.0, 0.0, 0.0), b), ('fmul', a, b)), (('fdot4', ('vec4', a, b, 0.0, 0.0), c), ('fdot2', ('vec2', a, b), c)), -- 2.30.2