From 92f70df8c38a36d913334c596ce26af64b6c569b Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 31 Aug 2019 11:40:32 -0700 Subject: [PATCH] nir/algebraic: Do not apply late DPH optimization in vertex processing stages MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Some shaders do not use 'invariant' in vertex and (possibly) geometry shader stages on some outputs that are intended to be invariant. For various reasons, this optimization may not be fully applied in all shaders used for different rendering passes of the same geometry. This can result in Z-fighting artifacts (at best). For now, disable this optimization in these stages. In tessellation stages applications seem to use 'precise' when necessary, so allow the optimization in those stages. Reviewed-by: Kenneth Graunke Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111490 Fixes: 09705747d72 ("nir/algebraic: Reassociate fadd into fmul in DPH-like pattern") All Gen8+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 16194726 -> 16344745 (0.93%) instructions in affected programs: 2855172 -> 3005191 (5.25%) helped: 6 HURT: 20279 helped stats (abs) min: 1 max: 3 x̄: 1.33 x̃: 1 helped stats (rel) min: 0.44% max: 1.00% x̄: 0.54% x̃: 0.44% HURT stats (abs) min: 1 max: 32 x̄: 7.40 x̃: 7 HURT stats (rel) min: 0.14% max: 42.86% x̄: 8.58% x̃: 6.56% 95% mean confidence interval for instructions value: 7.34 7.45 95% mean confidence interval for instructions %-change: 8.48% 8.67% Instructions are HURT. total cycles in shared programs: 364471296 -> 365014683 (0.15%) cycles in affected programs: 32421530 -> 32964917 (1.68%) helped: 2925 HURT: 16144 helped stats (abs) min: 1 max: 403 x̄: 18.39 x̃: 5 helped stats (rel) min: <.01% max: 22.61% x̄: 1.97% x̃: 1.15% HURT stats (abs) min: 1 max: 18471 x̄: 36.99 x̃: 15 HURT stats (rel) min: 0.02% max: 52.58% x̄: 5.60% x̃: 3.87% 95% mean confidence interval for cycles value: 21.58 35.41 95% mean confidence interval for cycles %-change: 4.36% 4.52% Cycles are HURT. --- src/compiler/nir/nir_opt_algebraic.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index e3b503f3825..ed050b6c083 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1537,10 +1537,19 @@ late_optimizations = [ # fadd(ffma(v1.z, v2.z, ffma(v1.y, v2.y, fmul(v1.x, v2.x))), v1.w) # # Reassociate the last addition into the first multiplication. + # + # Some shaders do not use 'invariant' in vertex and (possibly) geometry + # shader stages on some outputs that are intended to be invariant. For + # various reasons, this optimization may not be fully applied in all + # shaders used for different rendering passes of the same geometry. This + # can result in Z-fighting artifacts (at best). For now, disable this + # optimization in these stages. See bugzilla #111490. In tessellation + # stages applications seem to use 'precise' when necessary, so allow the + # optimization in those stages. (('~fadd', ('ffma(is_used_once)', a, b, ('ffma', c, d, ('fmul', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)'))), 'g(is_not_const)'), - ('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', 'g'))), '!options->intel_vec4'), - (('~fadd', ('ffma(is_used_once)', a, b, ('fmul', 'e(is_not_const_and_not_fsign)', 'f(is_not_const_and_not_fsign)') ), 'g(is_not_const)'), - ('ffma', a, b, ('ffma', e, 'f', 'g') ), '!options->intel_vec4'), + ('ffma', a, b, ('ffma', c, d, ('ffma', e, 'f', 'g'))), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), + (('~fadd', ('ffma(is_used_once)', a, b, ('fmul', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'), + ('ffma', a, b, ('ffma', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), ] print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()) -- 2.30.2