i965: Perform basic optimizations on the BROADCAST opcode.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 19 Feb 2015 12:52:24 +0000 (14:52 +0200)
committerFrancisco Jerez <currojerez@riseup.net>
Mon, 4 May 2015 14:44:17 +0000 (17:44 +0300)
v2: Style fixes.

Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
src/mesa/drivers/dri/i965/brw_fs_cse.cpp
src/mesa/drivers/dri/i965/brw_ir_fs.h
src/mesa/drivers/dri/i965/brw_ir_vec4.h
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
src/mesa/drivers/dri/i965/brw_vec4_cse.cpp

index f5e8dfa42e1ee626a4074cce63380612e24ee4c9..182c79fc83cf5de557f7715fb9734f11efdcefa4 100644 (file)
@@ -2546,6 +2546,22 @@ fs_visitor::opt_algebraic()
          }
          break;
       }
+      case SHADER_OPCODE_BROADCAST:
+         if (is_uniform(inst->src[0])) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->sources = 1;
+            inst->force_writemask_all = true;
+            progress = true;
+         } else if (inst->src[1].file == IMM) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0] = component(inst->src[0],
+                                     inst->src[1].fixed_hw_reg.dw1.ud);
+            inst->sources = 1;
+            inst->force_writemask_all = true;
+            progress = true;
+         }
+         break;
+
       default:
         break;
       }
index c9ce2bd4513647b12db6cf0ecd1d90dd309d9ad2..d926c1d0f2186e97e0b4b2d240e5ccb31d5e9d35 100644 (file)
@@ -608,6 +608,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
          break;
 
       case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+      case SHADER_OPCODE_BROADCAST:
          inst->src[i] = val;
          progress = true;
          break;
index c1d06161cbf1311a0a006f76e8e2b5fb43d93dd7..8958e62180273ce6637a4a9c54d7c4a390051b03 100644 (file)
@@ -89,6 +89,7 @@ is_expression(const fs_inst *const inst)
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
    case FS_OPCODE_CINTERP:
    case FS_OPCODE_LINTERP:
+   case SHADER_OPCODE_BROADCAST:
       return true;
    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
index 1e54bb23301d712249d82d9d83733dddae844312..0727ac5109e5c9bbcb9fffe02aff11ca51695d12 100644 (file)
@@ -166,6 +166,13 @@ component(fs_reg reg, unsigned idx)
    return reg;
 }
 
+static inline bool
+is_uniform(const fs_reg &reg)
+{
+   return (reg.width == 1 || reg.stride == 0 || reg.is_null()) &&
+          (!reg.reladdr || is_uniform(*reg.reladdr));
+}
+
 /**
  * Get either of the 8-component halves of a 16-component register.
  *
index 955a62d5f9ef804f6220c6bd8d7dbc9af448c9a0..a56fdd6fce91fe40eac242612b9c6b328d252f36 100644 (file)
@@ -95,6 +95,13 @@ negate(src_reg reg)
    return reg;
 }
 
+static inline bool
+is_uniform(const src_reg &reg)
+{
+   return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
+          (!reg.reladdr || is_uniform(*reg.reladdr));
+}
+
 class dst_reg : public backend_reg
 {
 public:
index 4f755538d33733d21cfe919e01222e1f13b08eb0..607129bc63f331b70d0413819399e15fc7c9b38f 100644 (file)
@@ -682,6 +682,16 @@ vec4_visitor::opt_algebraic()
          }
          break;
       }
+      case SHADER_OPCODE_BROADCAST:
+         if (is_uniform(inst->src[0]) ||
+             inst->src[1].is_zero()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[1] = src_reg();
+            inst->force_writemask_all = true;
+            progress = true;
+         }
+         break;
+
       default:
         break;
       }
index 0fbf0ab50a4b1d6947820fa4350fb3dba31d4ffc..2d9afa8145fcefffc5181237107e304d60e9ddc7 100644 (file)
@@ -152,6 +152,7 @@ try_constant_propagate(const struct brw_device_info *devinfo,
 
    switch (inst->opcode) {
    case BRW_OPCODE_MOV:
+   case SHADER_OPCODE_BROADCAST:
       inst->src[arg] = value;
       return true;
 
index 100e511a56c84b02b47e4bfa5a41ab9117dee3c0..66b531c2909519b5fcc2e8047fea7a121c086df7 100644 (file)
@@ -72,6 +72,7 @@ is_expression(const vec4_instruction *const inst)
    case BRW_OPCODE_MAD:
    case BRW_OPCODE_LRP:
    case VEC4_OPCODE_UNPACK_UNIFORM:
+   case SHADER_OPCODE_BROADCAST:
       return true;
    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ: