From 9f43b8492818bab47ef9cc489b91c2618446a3e9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 15 Mar 2013 14:43:28 -0700 Subject: [PATCH] i965/fs: Do CSE on gen7's varying-index pull constant loads. This is our first CSE on a regs_written() > 1 instruction, so it takes a bit of extra fixup. Reduces the number of loads on kwin's Lanczos shader from 12 to 2. v2: Fix compiler warning (false positive on possibly-uninitialized variable) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61554 Reviewed-by: Kenneth Graunke (v1) NOTE: This is a candidate for the 9.1 branch. --- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 43 ++++++++++++++++++------ 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 02642c91a61..5a50d45ddc9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -68,6 +68,7 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: return true; @@ -129,21 +130,41 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) */ bool no_existing_temp = entry->tmp.file == BAD_FILE; if (no_existing_temp) { - entry->tmp = fs_reg(this, glsl_type::float_type); - entry->tmp.type = inst->dst.type; - - fs_inst *copy = new(ralloc_parent(inst)) - fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp); - entry->generator->insert_after(copy); - entry->generator->dst = entry->tmp; + int written = entry->generator->regs_written(); + + fs_reg orig_dst = entry->generator->dst; + fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written), + orig_dst.type); + entry->tmp = tmp; + entry->generator->dst = tmp; + + for (int i = 0; i < written; i++) { + fs_inst *copy = MOV(orig_dst, tmp); + copy->force_writemask_all = + entry->generator->force_writemask_all; + entry->generator->insert_after(copy); + + orig_dst.reg_offset++; + tmp.reg_offset++; + } } /* dest <- temp */ + int written = inst->regs_written(); + assert(written == entry->generator->regs_written()); assert(inst->dst.type == entry->tmp.type); - fs_inst *copy = new(ralloc_parent(inst)) - fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp); - copy->force_writemask_all = inst->force_writemask_all; - inst->replace_with(copy); + fs_reg dst = inst->dst; + fs_reg tmp = entry->tmp; + fs_inst *copy = NULL; + for (int i = 0; i < written; i++) { + copy = MOV(dst, tmp); + copy->force_writemask_all = inst->force_writemask_all; + inst->insert_before(copy); + + dst.reg_offset++; + tmp.reg_offset++; + } + inst->remove(); /* Appending an instruction may have changed our bblock end. */ if (inst == block->end) { -- 2.30.2