i965/fs: Do CSE on gen7's varying-index pull constant loads.
authorEric Anholt <eric@anholt.net>
Fri, 15 Mar 2013 21:43:28 +0000 (14:43 -0700)
committerEric Anholt <eric@anholt.net>
Mon, 1 Apr 2013 23:17:25 +0000 (16:17 -0700)
This is our first CSE on a regs_written() > 1 instruction, so it takes a
bit of extra fixup.  Reduces the number of loads on kwin's Lanczos shader
from 12 to 2.

v2: Fix compiler warning (false positive on possibly-uninitialized variable)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61554
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)
NOTE: This is a candidate for the 9.1 branch.

src/mesa/drivers/dri/i965/brw_fs_cse.cpp

index 02642c91a617d6cd7040df97a0b12842d07aafbc..5a50d45ddc981ed847c25a5ad55b61d98507ba81 100644 (file)
@@ -68,6 +68,7 @@ is_expression(const fs_inst *const inst)
    case BRW_OPCODE_MAD:
    case BRW_OPCODE_LRP:
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
    case FS_OPCODE_CINTERP:
    case FS_OPCODE_LINTERP:
       return true;
@@ -129,21 +130,41 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
             */
            bool no_existing_temp = entry->tmp.file == BAD_FILE;
            if (no_existing_temp) {
-              entry->tmp = fs_reg(this, glsl_type::float_type);
-              entry->tmp.type = inst->dst.type;
-
-              fs_inst *copy = new(ralloc_parent(inst))
-                 fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp);
-              entry->generator->insert_after(copy);
-              entry->generator->dst = entry->tmp;
+               int written = entry->generator->regs_written();
+
+               fs_reg orig_dst = entry->generator->dst;
+               fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
+                                   orig_dst.type);
+               entry->tmp = tmp;
+               entry->generator->dst = tmp;
+
+               for (int i = 0; i < written; i++) {
+                  fs_inst *copy = MOV(orig_dst, tmp);
+                  copy->force_writemask_all =
+                     entry->generator->force_writemask_all;
+                  entry->generator->insert_after(copy);
+
+                  orig_dst.reg_offset++;
+                  tmp.reg_offset++;
+               }
            }
 
            /* dest <- temp */
+            int written = inst->regs_written();
+            assert(written == entry->generator->regs_written());
             assert(inst->dst.type == entry->tmp.type);
-           fs_inst *copy = new(ralloc_parent(inst))
-              fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp);
-            copy->force_writemask_all = inst->force_writemask_all;
-           inst->replace_with(copy);
+            fs_reg dst = inst->dst;
+            fs_reg tmp = entry->tmp;
+            fs_inst *copy = NULL;
+            for (int i = 0; i < written; i++) {
+               copy = MOV(dst, tmp);
+               copy->force_writemask_all = inst->force_writemask_all;
+               inst->insert_before(copy);
+
+               dst.reg_offset++;
+               tmp.reg_offset++;
+            }
+            inst->remove();
 
            /* Appending an instruction may have changed our bblock end. */
            if (inst == block->end) {