i965/fs: Allow copy propagation with source modifiers.
authorEric Anholt <eric@anholt.net>
Wed, 6 Jun 2012 18:06:51 +0000 (11:06 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 3 Jul 2012 19:57:04 +0000 (12:57 -0700)
This shaves a few instructions off of a ton of programs.  For 12
shaders from tropics and sanctuary, it's enough reduction in register
pressure to get 16-wide mode.  7 shaders from heroes of newerth and
savage2 are hurt by about 1.1%, where copy propagation of negates ends
up preventing coalescing, but we could regain that by doing dataflow
analysis in our copy propagation.

No significant performance difference in tropics (n=11)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp

index d510e5b3609dd52f5c781193dc88adbd867813f9..a019cb5814c6194397f0fb32655d1fc00ba99295 100644 (file)
@@ -40,9 +40,25 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
       return false;
    }
 
+   /* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */
+   if (inst->conditional_mod &&
+       inst->src[arg].type == BRW_REGISTER_TYPE_UD &&
+       entry->src.negate)
+      return false;
+
+   bool has_source_modifiers = entry->src.abs || entry->src.negate;
+
+   if (intel->gen == 6 && inst->is_math() && has_source_modifiers)
+      return false;
+
    inst->src[arg].reg = entry->src.reg;
    inst->src[arg].reg_offset = entry->src.reg_offset;
 
+   if (!inst->src[arg].abs) {
+      inst->src[arg].abs = entry->src.abs;
+      inst->src[arg].negate ^= entry->src.negate;
+   }
+
    return true;
 }
 
@@ -113,9 +129,7 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
          !inst->predicated &&
          !inst->force_uncompressed &&
          !inst->force_sechalf &&
-         inst->src[0].smear == -1 &&
-         !inst->src[0].abs &&
-         !inst->src[0].negate) {
+         inst->src[0].smear == -1) {
         acp_entry *entry = ralloc(mem_ctx, acp_entry);
         entry->dst = inst->dst;
         entry->src = inst->src[0];