From bf4a6c99d242022e6ad42af68682609401ffcd73 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Fri, 22 Mar 2019 15:32:22 +1100 Subject: [PATCH] nir/gcm: allow derivative dependent intrinisics to be moved earlier We can't move them later as we could move them into non-uniform control flow, but moving them earlier should be fine. This helps avoid a bunch of spilling in unigine shaders due to moving the tex instructions sources earlier (outside if branches) but not the instruction itself. Reviewed-by: Kenneth Graunke Part-of: --- src/compiler/nir/nir_opt_gcm.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c index bdd1c7d2056..0d4c23ab631 100644 --- a/src/compiler/nir/nir_opt_gcm.c +++ b/src/compiler/nir/nir_opt_gcm.c @@ -54,10 +54,11 @@ struct gcm_instr_info { /* Flags used in the instr->pass_flags field for various instruction states */ enum { - GCM_INSTR_PINNED = (1 << 0), - GCM_INSTR_SCHEDULED_EARLY = (1 << 1), - GCM_INSTR_SCHEDULED_LATE = (1 << 2), - GCM_INSTR_PLACED = (1 << 3), + GCM_INSTR_PINNED = (1 << 0), + GCM_INSTR_SCHEDULE_EARLIER_ONLY = (1 << 1), + GCM_INSTR_SCHEDULED_EARLY = (1 << 2), + GCM_INSTR_SCHEDULED_LATE = (1 << 3), + GCM_INSTR_PLACED = (1 << 4), }; struct gcm_state { @@ -133,8 +134,8 @@ gcm_pin_instructions(nir_function_impl *impl, struct gcm_state *state) case nir_op_fddy_fine: case nir_op_fddx_coarse: case nir_op_fddy_coarse: - /* These can only go in uniform control flow; pin them for now */ - instr->pass_flags = GCM_INSTR_PINNED; + /* These can only go in uniform control flow */ + instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY; break; default: @@ -145,7 +146,7 @@ gcm_pin_instructions(nir_function_impl *impl, struct gcm_state *state) case nir_instr_type_tex: if (nir_tex_instr_has_implicit_derivative(nir_instr_as_tex(instr))) - instr->pass_flags = GCM_INSTR_PINNED; + instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY; break; case nir_instr_type_deref: @@ -353,6 +354,12 @@ gcm_schedule_late_def(nir_ssa_def *def, void *void_state) return true; } + if (def->parent_instr->pass_flags & GCM_INSTR_SCHEDULE_EARLIER_ONLY && + lca != def->parent_instr->block && + nir_block_dominates(def->parent_instr->block, lca)) { + lca = def->parent_instr->block; + } + /* We now have the LCA of all of the uses. If our invariants hold, * this is dominated by the block that we chose when scheduling early. * We now walk up the dominance tree and pick the lowest block that is -- 2.30.2