i965/vec4: Allow constant propagation of VF immediates.
authorMatt Turner <mattst88@gmail.com>
Sun, 21 Dec 2014 01:42:52 +0000 (17:42 -0800)
committerMatt Turner <mattst88@gmail.com>
Mon, 29 Dec 2014 18:08:18 +0000 (10:08 -0800)
total instructions in shared programs: 5877951 -> 5877012 (-0.02%)
instructions in affected programs:     155923 -> 154984 (-0.60%)

Helps 1233, hurts 156 shaders. The hurt shaders are addressed in the
next commit.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp

index 9deaffa9fc3b5e7c1940dabca44495920b4da128..9e47dd939902df38bb6a26f862b72df1393305c5 100644 (file)
@@ -50,7 +50,9 @@ is_direct_copy(vec4_instruction *inst)
           inst->dst.file == GRF &&
           !inst->dst.reladdr &&
           !inst->src[0].reladdr &&
-          inst->dst.type == inst->src[0].type);
+          (inst->dst.type == inst->src[0].type ||
+            (inst->dst.type == BRW_REGISTER_TYPE_F &&
+             inst->src[0].type == BRW_REGISTER_TYPE_VF)));
 }
 
 static bool
@@ -77,6 +79,22 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
           inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
 }
 
+static unsigned
+swizzle_vf_imm(unsigned vf4, unsigned swizzle)
+{
+   union {
+      unsigned vf4;
+      uint8_t vf[4];
+   } v = { vf4 }, ret;
+
+   ret.vf[0] = v.vf[BRW_GET_SWZ(swizzle, 0)];
+   ret.vf[1] = v.vf[BRW_GET_SWZ(swizzle, 1)];
+   ret.vf[2] = v.vf[BRW_GET_SWZ(swizzle, 2)];
+   ret.vf[3] = v.vf[BRW_GET_SWZ(swizzle, 3)];
+
+   return ret.vf4;
+}
+
 static bool
 try_constant_propagate(struct brw_context *brw, vec4_instruction *inst,
                        int arg, struct copy_entry *entry)
@@ -98,6 +116,8 @@ try_constant_propagate(struct brw_context *brw, vec4_instruction *inst,
    if (inst->src[arg].abs) {
       if (value.type == BRW_REGISTER_TYPE_F) {
         value.fixed_hw_reg.dw1.f = fabs(value.fixed_hw_reg.dw1.f);
+      } else if (value.type == BRW_REGISTER_TYPE_VF) {
+         value.fixed_hw_reg.dw1.ud &= ~0x80808080;
       } else if (value.type == BRW_REGISTER_TYPE_D) {
         if (value.fixed_hw_reg.dw1.d < 0)
            value.fixed_hw_reg.dw1.d = -value.fixed_hw_reg.dw1.d;
@@ -107,10 +127,16 @@ try_constant_propagate(struct brw_context *brw, vec4_instruction *inst,
    if (inst->src[arg].negate) {
       if (value.type == BRW_REGISTER_TYPE_F)
         value.fixed_hw_reg.dw1.f = -value.fixed_hw_reg.dw1.f;
+      else if (value.type == BRW_REGISTER_TYPE_VF)
+         value.fixed_hw_reg.dw1.ud ^= 0x80808080;
       else
         value.fixed_hw_reg.dw1.ud = -value.fixed_hw_reg.dw1.ud;
    }
 
+   if (value.type == BRW_REGISTER_TYPE_VF)
+      value.fixed_hw_reg.dw1.ud = swizzle_vf_imm(value.fixed_hw_reg.dw1.ud,
+                                                 inst->src[arg].swizzle);
+
    switch (inst->opcode) {
    case BRW_OPCODE_MOV:
       inst->src[arg] = value;