From: Eric Anholt Date: Wed, 6 Jun 2012 18:06:51 +0000 (-0700) Subject: i965/fs: Allow copy propagation with source modifiers. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0c4630bae001139dea42b78cd08157de4d90542b;p=mesa.git i965/fs: Allow copy propagation with source modifiers. This shaves a few instructions off of a ton of programs. For 12 shaders from tropics and sanctuary, it's enough reduction in register pressure to get 16-wide mode. 7 shaders from heroes of newerth and savage2 are hurt by about 1.1%, where copy propagation of negates ends up preventing coalescing, but we could regain that by doing dataflow analysis in our copy propagation. No significant performance difference in tropics (n=11) Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index d510e5b3609..a019cb5814c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -40,9 +40,25 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) return false; } + /* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */ + if (inst->conditional_mod && + inst->src[arg].type == BRW_REGISTER_TYPE_UD && + entry->src.negate) + return false; + + bool has_source_modifiers = entry->src.abs || entry->src.negate; + + if (intel->gen == 6 && inst->is_math() && has_source_modifiers) + return false; + inst->src[arg].reg = entry->src.reg; inst->src[arg].reg_offset = entry->src.reg_offset; + if (!inst->src[arg].abs) { + inst->src[arg].abs = entry->src.abs; + inst->src[arg].negate ^= entry->src.negate; + } + return true; } @@ -113,9 +129,7 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx, !inst->predicated && !inst->force_uncompressed && !inst->force_sechalf && - inst->src[0].smear == -1 && - !inst->src[0].abs && - !inst->src[0].negate) { + inst->src[0].smear == -1) { acp_entry *entry = ralloc(mem_ctx, acp_entry); entry->dst = inst->dst; entry->src = inst->src[0];