i965/fs: Move constant propagation to the same codebase as copy prop.
authorEric Anholt <eric@anholt.net>
Fri, 21 Sep 2012 11:11:54 +0000 (13:11 +0200)
committerEric Anholt <eric@anholt.net>
Mon, 8 Oct 2012 15:50:38 +0000 (08:50 -0700)
This means that we don't get constant prop across into the first block after a
BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it
across control flow at some point.  More importantly, with the next commit it
will help avoid O(n^2) with instruction count runtime for shaders that have
many constant moves.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp

index 27014133d4f35968e3f83132beadf203af2f37a7..0545a74ec3d2ae7dc743bd883149b7eda58216d3 100644 (file)
@@ -1249,168 +1249,6 @@ fs_visitor::setup_pull_constants()
    c->prog_data.nr_pull_params = pull_uniform_count;
 }
 
-/**
- * Attempts to move immediate constants into the immediate
- * constant slot of following instructions.
- *
- * Immediate constants are a bit tricky -- they have to be in the last
- * operand slot, you can't do abs/negate on them,
- */
-
-bool
-fs_visitor::propagate_constants()
-{
-   bool progress = false;
-
-   calculate_live_intervals();
-
-   foreach_list(node, &this->instructions) {
-      fs_inst *inst = (fs_inst *)node;
-
-      if (inst->opcode != BRW_OPCODE_MOV ||
-         inst->predicated ||
-         inst->dst.file != GRF || inst->src[0].file != IMM ||
-         inst->dst.type != inst->src[0].type ||
-         (c->dispatch_width == 16 &&
-          (inst->force_uncompressed || inst->force_sechalf)))
-        continue;
-
-      /* Don't bother with cases where we should have had the
-       * operation on the constant folded in GLSL already.
-       */
-      if (inst->saturate)
-        continue;
-
-      /* Found a move of a constant to a GRF.  Find anything else using the GRF
-       * before it's written, and replace it with the constant if we can.
-       */
-      for (fs_inst *scan_inst = (fs_inst *)inst->next;
-          !scan_inst->is_tail_sentinel();
-          scan_inst = (fs_inst *)scan_inst->next) {
-        if (scan_inst->opcode == BRW_OPCODE_DO ||
-            scan_inst->opcode == BRW_OPCODE_WHILE ||
-            scan_inst->opcode == BRW_OPCODE_ELSE ||
-            scan_inst->opcode == BRW_OPCODE_ENDIF) {
-           break;
-        }
-
-        for (int i = 2; i >= 0; i--) {
-           if (scan_inst->src[i].file != GRF ||
-               scan_inst->src[i].reg != inst->dst.reg ||
-               scan_inst->src[i].reg_offset != inst->dst.reg_offset)
-              continue;
-
-           /* Don't bother with cases where we should have had the
-            * operation on the constant folded in GLSL already.
-            */
-           if (scan_inst->src[i].negate || scan_inst->src[i].abs)
-              continue;
-
-           switch (scan_inst->opcode) {
-           case BRW_OPCODE_MOV:
-              scan_inst->src[i] = inst->src[0];
-              progress = true;
-              break;
-
-           case BRW_OPCODE_MUL:
-           case BRW_OPCODE_ADD:
-              if (i == 1) {
-                 scan_inst->src[i] = inst->src[0];
-                 progress = true;
-              } else if (i == 0 && scan_inst->src[1].file != IMM) {
-                 /* Fit this constant in by commuting the operands.
-                  * Exception: we can't do this for 32-bit integer MUL
-                  * because it's asymmetric.
-                  */
-                 if (scan_inst->opcode == BRW_OPCODE_MUL &&
-                     (scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
-                      scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
-                    break;
-                 scan_inst->src[0] = scan_inst->src[1];
-                 scan_inst->src[1] = inst->src[0];
-                 progress = true;
-              }
-              break;
-
-           case BRW_OPCODE_CMP:
-           case BRW_OPCODE_IF:
-              if (i == 1) {
-                 scan_inst->src[i] = inst->src[0];
-                 progress = true;
-              } else if (i == 0 && scan_inst->src[1].file != IMM) {
-                 uint32_t new_cmod;
-
-                 new_cmod = brw_swap_cmod(scan_inst->conditional_mod);
-                 if (new_cmod != ~0u) {
-                    /* Fit this constant in by swapping the operands and
-                     * flipping the test
-                     */
-                    scan_inst->src[0] = scan_inst->src[1];
-                    scan_inst->src[1] = inst->src[0];
-                    scan_inst->conditional_mod = new_cmod;
-                    progress = true;
-                 }
-              }
-              break;
-
-           case BRW_OPCODE_SEL:
-              if (i == 1) {
-                 scan_inst->src[i] = inst->src[0];
-                 progress = true;
-              } else if (i == 0 && scan_inst->src[1].file != IMM) {
-                 scan_inst->src[0] = scan_inst->src[1];
-                 scan_inst->src[1] = inst->src[0];
-
-                 /* If this was predicated, flipping operands means
-                  * we also need to flip the predicate.
-                  */
-                 if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
-                    scan_inst->predicate_inverse =
-                       !scan_inst->predicate_inverse;
-                 }
-                 progress = true;
-              }
-              break;
-
-           case SHADER_OPCODE_RCP:
-              /* The hardware doesn't do math on immediate values
-               * (because why are you doing that, seriously?), but
-               * the correct answer is to just constant fold it
-               * anyway.
-               */
-              assert(i == 0);
-              if (inst->src[0].imm.f != 0.0f) {
-                 scan_inst->opcode = BRW_OPCODE_MOV;
-                 scan_inst->src[0] = inst->src[0];
-                 scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
-                 progress = true;
-              }
-              break;
-
-            case FS_OPCODE_PULL_CONSTANT_LOAD:
-              scan_inst->src[i] = inst->src[0];
-              progress = true;
-              break;
-
-           default:
-              break;
-           }
-        }
-
-        if (scan_inst->dst.file == GRF &&
-             scan_inst->overwrites_reg(inst->dst)) {
-           break;
-        }
-      }
-   }
-
-   if (progress)
-       this->live_intervals_valid = false;
-
-   return progress;
-}
-
-
 bool
 fs_visitor::opt_algebraic()
 {
@@ -2025,7 +1863,6 @@ fs_visitor::run()
 
         progress = remove_duplicate_mrf_writes() || progress;
 
-        progress = propagate_constants() || progress;
         progress = opt_algebraic() || progress;
         progress = opt_cse() || progress;
         progress = opt_copy_propagate() || progress;
index 4db9e90ed5e3ba6248c137c58353464a73afd669..269dd0aceb9cc2a2c4298642a0c24d824fb7c27d 100644 (file)
@@ -240,12 +240,12 @@ public:
    void split_virtual_grfs();
    void setup_pull_constants();
    void calculate_live_intervals();
-   bool propagate_constants();
    bool opt_algebraic();
    bool opt_cse();
    bool opt_cse_local(fs_bblock *block, exec_list *aeb);
    bool opt_copy_propagate();
    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
+   bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
    bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
                                 exec_list *acp);
    bool register_coalesce();
index 1870f43feba498fad75effa5c919ecf02298c750..6eff80285d780f9be5c193106ac48c98f58d19b9 100644 (file)
@@ -34,6 +34,9 @@ struct acp_entry : public exec_node {
 bool
 fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
 {
+   if (entry->src.file == IMM)
+      return false;
+
    if (inst->src[arg].file != entry->dst.file ||
        inst->src[arg].reg != entry->dst.reg ||
        inst->src[arg].reg_offset != entry->dst.reg_offset) {
@@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    return true;
 }
 
+
+bool
+fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
+{
+   bool progress = false;
+
+   if (entry->src.file != IMM)
+      return false;
+
+   for (int i = 2; i >= 0; i--) {
+      if (inst->src[i].file != entry->dst.file ||
+          inst->src[i].reg != entry->dst.reg ||
+          inst->src[i].reg_offset != entry->dst.reg_offset)
+         continue;
+
+      /* Don't bother with cases that should have been taken care of by the
+       * GLSL compiler's constant folding pass.
+       */
+      if (inst->src[i].negate || inst->src[i].abs)
+         continue;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+         inst->src[i] = entry->src;
+         progress = true;
+         break;
+
+      case BRW_OPCODE_MUL:
+      case BRW_OPCODE_ADD:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            /* Fit this constant in by commuting the operands.
+             * Exception: we can't do this for 32-bit integer MUL
+             * because it's asymmetric.
+             */
+            if (inst->opcode == BRW_OPCODE_MUL &&
+                (inst->src[1].type == BRW_REGISTER_TYPE_D ||
+                 inst->src[1].type == BRW_REGISTER_TYPE_UD))
+               break;
+            inst->src[0] = inst->src[1];
+            inst->src[1] = entry->src;
+            progress = true;
+         }
+         break;
+
+      case BRW_OPCODE_CMP:
+      case BRW_OPCODE_IF:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            uint32_t new_cmod;
+
+            new_cmod = brw_swap_cmod(inst->conditional_mod);
+            if (new_cmod != ~0u) {
+               /* Fit this constant in by swapping the operands and
+                * flipping the test
+                */
+               inst->src[0] = inst->src[1];
+               inst->src[1] = entry->src;
+               inst->conditional_mod = new_cmod;
+               progress = true;
+            }
+         }
+         break;
+
+      case BRW_OPCODE_SEL:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            inst->src[0] = inst->src[1];
+            inst->src[1] = entry->src;
+
+            /* If this was predicated, flipping operands means
+             * we also need to flip the predicate.
+             */
+            if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+               inst->predicate_inverse =
+                  !inst->predicate_inverse;
+            }
+            progress = true;
+         }
+         break;
+
+      case SHADER_OPCODE_RCP:
+         /* The hardware doesn't do math on immediate values
+          * (because why are you doing that, seriously?), but
+          * the correct answer is to just constant fold it
+          * anyway.
+          */
+         assert(i == 0);
+         if (inst->src[0].imm.f != 0.0f) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0] = entry->src;
+            inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
+            progress = true;
+         }
+         break;
+
+      case FS_OPCODE_PULL_CONSTANT_LOAD:
+         inst->src[i] = entry->src;
+         progress = true;
+         break;
+
+      default:
+         break;
+      }
+   }
+
+   return progress;
+}
+
 /** @file brw_fs_copy_propagation.cpp
  *
  * Support for local copy propagation by walking the list of instructions
@@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
       foreach_list(entry_node, acp) {
         acp_entry *entry = (acp_entry *)entry_node;
 
+         if (try_constant_propagate(inst, entry))
+            progress = true;
+
         for (int i = 0; i < 3; i++) {
            if (try_copy_propagate(inst, i, entry))
               progress = true;
@@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
          ((inst->src[0].file == GRF &&
            (inst->src[0].reg != inst->dst.reg ||
             inst->src[0].reg_offset != inst->dst.reg_offset)) ||
-          inst->src[0].file == UNIFORM) &&
+           inst->src[0].file == UNIFORM ||
+           inst->src[0].file == IMM) &&
          inst->src[0].type == inst->dst.type &&
          !inst->saturate &&
          !inst->predicated &&