i965: Perform basic optimizations on the FIND_LIVE_CHANNEL opcode.
authorFrancisco Jerez <currojerez@riseup.net>
Fri, 20 Feb 2015 18:25:04 +0000 (20:25 +0200)
committerFrancisco Jerez <currojerez@riseup.net>
Mon, 4 May 2015 14:44:17 +0000 (17:44 +0300)
v2: Save some CPU cycles by doing 'return progress' rather than
    'depth++' in the discard jump special case.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_cse.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_cse.cpp

index 182c79fc83cf5de557f7715fb9734f11efdcefa4..1ca7ca6caed9109cab90a635ed73247af148ad64 100644 (file)
@@ -2979,6 +2979,53 @@ fs_visitor::compute_to_mrf()
    return progress;
 }
 
+/**
+ * Eliminate FIND_LIVE_CHANNEL instructions occurring outside any control
+ * flow.  We could probably do better here with some form of divergence
+ * analysis.
+ */
+bool
+fs_visitor::eliminate_find_live_channel()
+{
+   bool progress = false;
+   unsigned depth = 0;
+
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+      switch (inst->opcode) {
+      case BRW_OPCODE_IF:
+      case BRW_OPCODE_DO:
+         depth++;
+         break;
+
+      case BRW_OPCODE_ENDIF:
+      case BRW_OPCODE_WHILE:
+         depth--;
+         break;
+
+      case FS_OPCODE_DISCARD_JUMP:
+         /* This can potentially make control flow non-uniform until the end
+          * of the program.
+          */
+         return progress;
+
+      case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+         if (depth == 0) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0] = fs_reg(0);
+            inst->sources = 1;
+            inst->force_writemask_all = true;
+            progress = true;
+         }
+         break;
+
+      default:
+         break;
+      }
+   }
+
+   return progress;
+}
+
 /**
  * Once we've generated code, try to convert normal FS_OPCODE_FB_WRITE
  * instructions to FS_OPCODE_REP_FB_WRITE.
@@ -3920,6 +3967,7 @@ fs_visitor::optimize()
       OPT(opt_zero_samples);
       OPT(register_coalesce);
       OPT(compute_to_mrf);
+      OPT(eliminate_find_live_channel);
 
       OPT(compact_virtual_grfs);
    } while (progress);
index 0783754cba739976b3e97a5248cf54a65f808980..5f4ff15ba4d1da6fbf4c75e1b8ceaaad91d8f264 100644 (file)
@@ -239,6 +239,7 @@ public:
    bool opt_register_renaming();
    bool register_coalesce();
    bool compute_to_mrf();
+   bool eliminate_find_live_channel();
    bool dead_code_eliminate();
    bool remove_duplicate_mrf_writes();
 
index 8958e62180273ce6637a4a9c54d7c4a390051b03..43370cb5c2e41746016bdc7a190209b892b69e6c 100644 (file)
@@ -89,6 +89,7 @@ is_expression(const fs_inst *const inst)
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
    case FS_OPCODE_CINTERP:
    case FS_OPCODE_LINTERP:
+   case SHADER_OPCODE_FIND_LIVE_CHANNEL:
    case SHADER_OPCODE_BROADCAST:
       return true;
    case SHADER_OPCODE_RCP:
index 607129bc63f331b70d0413819399e15fc7c9b38f..a8d0e4acfc9d4742ea98e6a9370388229ee31b66 100644 (file)
@@ -1131,6 +1131,46 @@ vec4_visitor::opt_register_coalesce()
    return progress;
 }
 
+/**
+ * Eliminate FIND_LIVE_CHANNEL instructions occurring outside any control
+ * flow.  We could probably do better here with some form of divergence
+ * analysis.
+ */
+bool
+vec4_visitor::eliminate_find_live_channel()
+{
+   bool progress = false;
+   unsigned depth = 0;
+
+   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+      switch (inst->opcode) {
+      case BRW_OPCODE_IF:
+      case BRW_OPCODE_DO:
+         depth++;
+         break;
+
+      case BRW_OPCODE_ENDIF:
+      case BRW_OPCODE_WHILE:
+         depth--;
+         break;
+
+      case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+         if (depth == 0) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0] = src_reg(0);
+            inst->force_writemask_all = true;
+            progress = true;
+         }
+         break;
+
+      default:
+         break;
+      }
+   }
+
+   return progress;
+}
+
 /**
  * Splits virtual GRFs requesting more than one contiguous physical register.
  *
@@ -1759,6 +1799,7 @@ vec4_visitor::run()
       OPT(opt_cse);
       OPT(opt_algebraic);
       OPT(opt_register_coalesce);
+      OPT(eliminate_find_live_channel);
    } while (progress);
 
    pass_num = 0;
index 1f1f100788eae7f9594b5f28bd1493ca3a60fd6d..3f56c5cf03fc8ed9b2ab33de31258fad384bc3f9 100644 (file)
@@ -209,6 +209,7 @@ public:
    bool opt_cse();
    bool opt_algebraic();
    bool opt_register_coalesce();
+   bool eliminate_find_live_channel();
    bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
    void opt_set_dependency_control();
    void opt_schedule_instructions();
index 66b531c2909519b5fcc2e8047fea7a121c086df7..9147c3cbb793e0a89d5d2909449b57b39df88944 100644 (file)
@@ -72,6 +72,7 @@ is_expression(const vec4_instruction *const inst)
    case BRW_OPCODE_MAD:
    case BRW_OPCODE_LRP:
    case VEC4_OPCODE_UNPACK_UNIFORM:
+   case SHADER_OPCODE_FIND_LIVE_CHANNEL:
    case SHADER_OPCODE_BROADCAST:
       return true;
    case SHADER_OPCODE_RCP: