i965: Use a separate register for every access to an SSA undef.
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 29 Jul 2016 08:29:09 +0000 (01:29 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 4 Aug 2016 07:40:10 +0000 (00:40 -0700)
Previously, we allocated a new VGRF for every undefined definition.
Instead, this patch makes us allocate a new VGRF for every use of an
undefined definition.  This makes sure that undefined values are
fully independent of one another, and have live ranges limited to
their single use.  This allows register coalescing to combine the
source and destination of MOVs from undefined sources, eliminating
the MOV altogether.

On Broadwell:

total instructions in shared programs: 11641187 -> 11640214 (-0.01%)
instructions in affected programs: 70199 -> 69226 (-1.39%)
helped: 213
HURT: 1

v2: Add a comment (based on Iago's suggested one).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index fc1e1c4a2c56ea314c36cccf6bb75393c7688abe..8b1ea798d908d0f592ecee84b660ef97a50cb28e 100644 (file)
@@ -204,8 +204,6 @@ public:
    void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
    void nir_emit_load_const(const brw::fs_builder &bld,
                             nir_load_const_instr *instr);
-   void nir_emit_undef(const brw::fs_builder &bld,
-                       nir_ssa_undef_instr *instr);
    void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
                               nir_intrinsic_instr *instr);
    void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
index 5236d0e5fc4feaa32fcfacca15d0afd6a27627a5..c1f413b366a90dc315323e23d00b7165fe1056e0 100644 (file)
@@ -445,7 +445,10 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
       break;
 
    case nir_instr_type_ssa_undef:
-      nir_emit_undef(abld, nir_instr_as_ssa_undef(instr));
+      /* We create a new VGRF for undefs on every use (by handling
+       * them in get_nir_src()), rather than for each definition.
+       * This helps register coalescing eliminate MOVs from undef.
+       */
       break;
 
    case nir_instr_type_jump:
@@ -1489,21 +1492,18 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
    nir_ssa_values[instr->def.index] = reg;
 }
 
-void
-fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr)
-{
-   const brw_reg_type reg_type =
-      instr->def.bit_size == 32 ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF;
-   nir_ssa_values[instr->def.index] =
-      bld.vgrf(reg_type, instr->def.num_components);
-}
-
 fs_reg
 fs_visitor::get_nir_src(const nir_src &src)
 {
    fs_reg reg;
    if (src.is_ssa) {
-      reg = nir_ssa_values[src.ssa->index];
+      if (src.ssa->parent_instr->type == nir_instr_type_ssa_undef) {
+         const brw_reg_type reg_type = src.ssa->bit_size == 32 ?
+            BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF;
+         reg = bld.vgrf(reg_type, src.ssa->num_components);
+      } else {
+         reg = nir_ssa_values[src.ssa->index];
+      }
    } else {
       /* We don't handle indirects on locals */
       assert(src.reg.indirect == NULL);