nir/opcodes: Add new 'umul_low' and 'imadsh_mix16' opcodes
[mesa.git] / src / compiler / nir / nir_lower_phis_to_scalar.c
index 9fd00cc784aad8233d6d9b931331ef9fdb51600f..bea9dc407534b5d4ff1783a07232c521c77a3070 100644 (file)
@@ -75,24 +75,35 @@ is_phi_src_scalarizable(nir_phi_src *src,
       return should_lower_phi(nir_instr_as_phi(src_instr), state);
 
    case nir_instr_type_load_const:
-   case nir_instr_type_ssa_undef:
       /* These are trivially scalarizable */
       return true;
 
+   case nir_instr_type_ssa_undef:
+      /* The caller of this function is going to OR the results and we don't
+       * want undefs to count so we return false.
+       */
+      return false;
+
    case nir_instr_type_intrinsic: {
       nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr);
 
       switch (src_intrin->intrinsic) {
-      case nir_intrinsic_load_var:
-         return src_intrin->variables[0]->var->data.mode == nir_var_shader_in ||
-                src_intrin->variables[0]->var->data.mode == nir_var_uniform;
+      case nir_intrinsic_load_deref: {
+         nir_deref_instr *deref = nir_src_as_deref(src_intrin->src[0]);
+         return deref->mode == nir_var_shader_in ||
+                deref->mode == nir_var_uniform ||
+                deref->mode == nir_var_mem_ubo ||
+                deref->mode == nir_var_mem_ssbo ||
+                deref->mode == nir_var_mem_global;
+      }
 
-      case nir_intrinsic_interp_var_at_centroid:
-      case nir_intrinsic_interp_var_at_sample:
-      case nir_intrinsic_interp_var_at_offset:
+      case nir_intrinsic_interp_deref_at_centroid:
+      case nir_intrinsic_interp_deref_at_sample:
+      case nir_intrinsic_interp_deref_at_offset:
       case nir_intrinsic_load_uniform:
       case nir_intrinsic_load_ubo:
       case nir_intrinsic_load_ssbo:
+      case nir_intrinsic_load_global:
       case nir_intrinsic_load_input:
          return true;
       default:
@@ -144,11 +155,16 @@ should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state)
     */
    entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1);
 
-   bool scalarizable = true;
+   bool scalarizable = false;
 
    nir_foreach_phi_src(src, phi) {
+      /* This loop ignores srcs that are not scalarizable because its likely
+       * still worth copying to temps if another phi source is scalarizable.
+       * This reduces register spilling by a huge amount in the i965 driver for
+       * Deus Ex: MD.
+       */
       scalarizable = is_phi_src_scalarizable(src, state);
-      if (!scalarizable)
+      if (scalarizable)
          break;
    }
 
@@ -166,6 +182,8 @@ static bool
 lower_phis_to_scalar_block(nir_block *block,
                            struct lower_phis_to_scalar_state *state)
 {
+   bool progress = false;
+
    /* Find the last phi node in the block */
    nir_phi_instr *last_phi = NULL;
    nir_foreach_instr(instr, block) {
@@ -217,7 +235,7 @@ lower_phis_to_scalar_block(nir_block *block,
          nir_foreach_phi_src(src, phi) {
             /* We need to insert a mov to grab the i'th component of src */
             nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
-                                                      nir_op_imov);
+                                                      nir_op_mov);
             nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, bit_size, NULL);
             mov->dest.write_mask = 1;
             nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
@@ -248,6 +266,8 @@ lower_phis_to_scalar_block(nir_block *block,
       ralloc_steal(state->dead_ctx, phi);
       nir_instr_remove(&phi->instr);
 
+      progress = true;
+
       /* We're using the safe iterator and inserting all the newly
        * scalarized phi nodes before their non-scalarized version so that's
        * ok.  However, we are also inserting vec operations after all of
@@ -258,27 +278,28 @@ lower_phis_to_scalar_block(nir_block *block,
          break;
    }
 
-   return true;
+   return progress;
 }
 
-static void
+static bool
 lower_phis_to_scalar_impl(nir_function_impl *impl)
 {
    struct lower_phis_to_scalar_state state;
+   bool progress = false;
 
    state.mem_ctx = ralloc_parent(impl);
    state.dead_ctx = ralloc_context(NULL);
-   state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
+   state.phi_table = _mesa_pointer_hash_table_create(state.dead_ctx);
 
    nir_foreach_block(block, impl) {
-      lower_phis_to_scalar_block(block, &state);
+      progress = lower_phis_to_scalar_block(block, &state) || progress;
    }
 
    nir_metadata_preserve(impl, nir_metadata_block_index |
                                nir_metadata_dominance);
 
    ralloc_free(state.dead_ctx);
+   return progress;
 }
 
 /** A pass that lowers vector phi nodes to scalar
@@ -288,11 +309,15 @@ lower_phis_to_scalar_impl(nir_function_impl *impl)
  * instance, if one of the sources is a non-scalarizable vector, then we
  * don't bother lowering because that would generate hard-to-coalesce movs.
  */
-void
+bool
 nir_lower_phis_to_scalar(nir_shader *shader)
 {
+   bool progress = false;
+
    nir_foreach_function(function, shader) {
       if (function->impl)
-         lower_phis_to_scalar_impl(function->impl);
+         progress = lower_phis_to_scalar_impl(function->impl) || progress;
    }
+
+   return progress;
 }