nir/gcm: allow derivative dependent intrinisics to be moved earlier
authorTimothy Arceri <tarceri@itsqueeze.com>
Fri, 22 Mar 2019 04:32:22 +0000 (15:32 +1100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 20 Apr 2020 03:46:29 +0000 (03:46 +0000)
We can't move them later as we could move them into non-uniform
control flow, but moving them earlier should be fine.

This helps avoid a bunch of spilling in unigine shaders due to
moving the tex instructions sources earlier (outside if branches)
but not the instruction itself.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4636>

src/compiler/nir/nir_opt_gcm.c

index bdd1c7d205635c195c6fdea4e42e5422ccc98619..0d4c23ab63136045583ef23bb73671069e3dfea3 100644 (file)
@@ -54,10 +54,11 @@ struct gcm_instr_info {
 
 /* Flags used in the instr->pass_flags field for various instruction states */
 enum {
-   GCM_INSTR_PINNED =            (1 << 0),
-   GCM_INSTR_SCHEDULED_EARLY =   (1 << 1),
-   GCM_INSTR_SCHEDULED_LATE =    (1 << 2),
-   GCM_INSTR_PLACED =            (1 << 3),
+   GCM_INSTR_PINNED =                (1 << 0),
+   GCM_INSTR_SCHEDULE_EARLIER_ONLY = (1 << 1),
+   GCM_INSTR_SCHEDULED_EARLY =       (1 << 2),
+   GCM_INSTR_SCHEDULED_LATE =        (1 << 3),
+   GCM_INSTR_PLACED =                (1 << 4),
 };
 
 struct gcm_state {
@@ -133,8 +134,8 @@ gcm_pin_instructions(nir_function_impl *impl, struct gcm_state *state)
             case nir_op_fddy_fine:
             case nir_op_fddx_coarse:
             case nir_op_fddy_coarse:
-               /* These can only go in uniform control flow; pin them for now */
-               instr->pass_flags = GCM_INSTR_PINNED;
+               /* These can only go in uniform control flow */
+               instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY;
                break;
 
             default:
@@ -145,7 +146,7 @@ gcm_pin_instructions(nir_function_impl *impl, struct gcm_state *state)
 
          case nir_instr_type_tex:
             if (nir_tex_instr_has_implicit_derivative(nir_instr_as_tex(instr)))
-               instr->pass_flags = GCM_INSTR_PINNED;
+               instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY;
             break;
 
          case nir_instr_type_deref:
@@ -353,6 +354,12 @@ gcm_schedule_late_def(nir_ssa_def *def, void *void_state)
       return true;
    }
 
+   if (def->parent_instr->pass_flags & GCM_INSTR_SCHEDULE_EARLIER_ONLY &&
+       lca != def->parent_instr->block &&
+       nir_block_dominates(def->parent_instr->block, lca)) {
+      lca = def->parent_instr->block;
+   }
+
    /* We now have the LCA of all of the uses.  If our invariants hold,
     * this is dominated by the block that we chose when scheduling early.
     * We now walk up the dominance tree and pick the lowest block that is