i965/fs: Enable CSE on uniform pull constant loads.
authorEric Anholt <eric@anholt.net>
Sat, 16 Feb 2013 03:49:32 +0000 (19:49 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 19 Feb 2013 18:34:03 +0000 (10:34 -0800)
Improves on a major performance regression for the dolphin wii emulator
from its move to using UBOs.  Performance in the UBO codepath (as
replayed through apitrace) is up 21.1% +/- 2.3% (n=26/29).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_cse.cpp

index a13ca362699a9402fd7d639bbea72a0db6c30fcc..44479d8e9ffef2286450ccb757aee6df246b556f 100644 (file)
@@ -66,6 +66,7 @@ is_expression(const fs_inst *const inst)
    case BRW_OPCODE_LINE:
    case BRW_OPCODE_PLN:
    case BRW_OPCODE_MAD:
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
    case FS_OPCODE_CINTERP:
    case FS_OPCODE_LINTERP:
       return true;
@@ -136,8 +137,10 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
            }
 
            /* dest <- temp */
+            assert(inst->dst.type == entry->tmp.type);
            fs_inst *copy = new(ralloc_parent(inst))
               fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp);
+            copy->force_writemask_all = inst->force_writemask_all;
            inst->replace_with(copy);
 
            /* Appending an instruction may have changed our bblock end. */