From 96fcb3f95bdd53c8c1bdc243c95811acabd3f52c Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 11 Oct 2018 14:21:42 -0700 Subject: [PATCH] nir/algebraic: Use value range analysis to eliminate tautological compares not used by if-statements MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This just eliminates tautological / contradictory compares that are used for bcsel and other non-if-statement cases. If-statements are not affected because removing flow control can cause the i965 instrution scheduler to create some very long live ranges resulting in unncessary spilling. This causes some shaders to fall of a performance cliff. Since many small if-statements are already flattened to bcsel, this optimization covers more than 68% of the possible cases (2417 shaders helped for instructions on Skylake vs. 3554). v2: Reorder and add whitespace to make the relationship between the patterns more obvious. Suggested by Caio. All Gen7+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 16333474 -> 16322028 (-0.07%) instructions in affected programs: 438559 -> 427113 (-2.61%) helped: 1765 HURT: 0 helped stats (abs) min: 1 max: 275 x̄: 6.48 x̃: 4 helped stats (rel) min: 0.20% max: 36.36% x̄: 4.07% x̃: 1.82% 95% mean confidence interval for instructions value: -6.87 -6.10 95% mean confidence interval for instructions %-change: -4.30% -3.84% Instructions are helped. total cycles in shared programs: 367608554 -> 367511103 (-0.03%) cycles in affected programs: 8368829 -> 8271378 (-1.16%) helped: 1541 HURT: 129 helped stats (abs) min: 1 max: 4468 x̄: 66.78 x̃: 39 helped stats (rel) min: 0.01% max: 45.69% x̄: 4.10% x̃: 2.17% HURT stats (abs) min: 1 max: 973 x̄: 42.25 x̃: 10 HURT stats (rel) min: 0.02% max: 64.39% x̄: 2.15% x̃: 0.60% 95% mean confidence interval for cycles value: -64.90 -51.81 95% mean confidence interval for cycles %-change: -3.89% -3.36% Cycles are helped. total spills in shared programs: 8867 -> 8868 (0.01%) spills in affected programs: 18 -> 19 (5.56%) helped: 0 HURT: 1 total fills in shared programs: 21900 -> 21903 (0.01%) fills in affected programs: 78 -> 81 (3.85%) helped: 0 HURT: 1 All Gen6 and earlier platforms had similar results. (Sandy Bridge shown) total instructions in shared programs: 10829877 -> 10829247 (<.01%) instructions in affected programs: 30240 -> 29610 (-2.08%) helped: 177 HURT: 0 helped stats (abs) min: 1 max: 15 x̄: 3.56 x̃: 3 helped stats (rel) min: 0.37% max: 17.39% x̄: 2.68% x̃: 1.94% 95% mean confidence interval for instructions value: -3.93 -3.18 95% mean confidence interval for instructions %-change: -3.04% -2.32% Instructions are helped. total cycles in shared programs: 154036580 -> 154035437 (<.01%) cycles in affected programs: 352402 -> 351259 (-0.32%) helped: 96 HURT: 28 helped stats (abs) min: 1 max: 128 x̄: 14.73 x̃: 6 helped stats (rel) min: 0.03% max: 24.00% x̄: 1.51% x̃: 0.46% HURT stats (abs) min: 1 max: 117 x̄: 9.68 x̃: 4 HURT stats (rel) min: 0.03% max: 2.24% x̄: 0.43% x̃: 0.23% 95% mean confidence interval for cycles value: -13.40 -5.03 95% mean confidence interval for cycles %-change: -1.62% -0.53% Cycles are helped. Reviewed-by: Caio Marcelo de Oliveira Filho --- src/compiler/nir/nir_opt_algebraic.py | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 29ef4eb13bf..d3f6c196e3b 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -798,6 +798,34 @@ optimizations = [ (('fmax', ('fadd', ('fmul', ('fsat', a), ('fneg', ('fsat', a))), 1.0), 0.0), ('fadd', ('fmul', ('fsat', a), ('fneg', ('fsat', a))), 1.0)), (('fmax', ('fadd', ('fneg', ('fmul', ('fsat', a), ('fsat', a))), 1.0), 0.0), ('fadd', ('fneg', ('fmul', ('fsat', a), ('fsat', a))), 1.0)), + (('fne(is_not_used_by_if)', 'a(is_not_zero)', 0.0), True), + (('feq(is_not_used_by_if)', 'a(is_not_zero)', 0.0), False), + + (('fge(is_not_used_by_if)', 'a(is_not_negative)', 'b(is_not_positive)'), True), + (('fge(is_not_used_by_if)', 'b(is_not_positive)', 'a(is_gt_zero)'), False), + (('fge(is_not_used_by_if)', 'a(is_lt_zero)', 'b(is_not_negative)'), False), + (('fge(is_not_used_by_if)', 'b(is_not_negative)', 'a(is_not_positive)'), True), + + (('flt(is_not_used_by_if)', 'a(is_not_negative)', 'b(is_not_positive)'), False), + (('flt(is_not_used_by_if)', 'b(is_not_positive)', 'a(is_gt_zero)'), True), + (('flt(is_not_used_by_if)', 'a(is_lt_zero)', 'b(is_not_negative)'), True), + (('flt(is_not_used_by_if)', 'b(is_not_negative)', 'a(is_not_positive)'), False), + + (('ine(is_not_used_by_if)', 'a(is_not_zero)', 0), True), + (('ieq(is_not_used_by_if)', 'a(is_not_zero)', 0), False), + + (('ige(is_not_used_by_if)', 'a(is_not_negative)', 'b(is_not_positive)'), True), + (('ige(is_not_used_by_if)', 'b(is_not_positive)', 'a(is_gt_zero)'), False), + (('ige(is_not_used_by_if)', 'a(is_lt_zero)', 'b(is_not_negative)'), False), + (('ige(is_not_used_by_if)', 'b(is_not_negative)', 'a(is_not_positive)'), True), + + (('ilt(is_not_used_by_if)', 'a(is_not_negative)', 'b(is_not_positive)'), False), + (('ilt(is_not_used_by_if)', 'b(is_not_positive)', 'a(is_gt_zero)'), True), + (('ilt(is_not_used_by_if)', 'a(is_lt_zero)', 'b(is_not_negative)'), True), + (('ilt(is_not_used_by_if)', 'b(is_not_negative)', 'a(is_not_positive)'), False), + + (('ult(is_not_used_by_if)', 0, 'a(is_gt_zero)'), True), + # Packing and then unpacking does nothing (('unpack_64_2x32_split_x', ('pack_64_2x32_split', a, b)), a), (('unpack_64_2x32_split_y', ('pack_64_2x32_split', a, b)), b), -- 2.30.2