nir: Convert a bcsel with only phi node sources to a phi node
authorIan Romanick <ian.d.romanick@intel.com>
Fri, 18 Jan 2019 01:53:40 +0000 (17:53 -0800)
committerIan Romanick <ian.d.romanick@intel.com>
Fri, 8 Feb 2019 18:37:06 +0000 (10:37 -0800)
v2: Remove the original ALU instruciton after all of its readers are
modified to read the new ALU instruction.

v3: Fix an issue where a bcsel that may not be executed on a loop
iteration due to a break statement is converted to a phi (and therefore
incorrectly "executed").  Noticed by Tim.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109216
Fixes: 8fb8ebfbb05 ("intel/compiler: More peephole select")
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
src/compiler/nir/nir_opt_if.c

index be0a2d55d78840c9a5642b002b58fd5be7eb6a3c..9afb901be14598f7726e9974855ed8549f91bd22 100644 (file)
@@ -542,6 +542,225 @@ opt_split_alu_of_phi(nir_builder *b, nir_loop *loop)
    return progress;
 }
 
+/**
+ * Get the SSA value from a phi node that corresponds to a specific block
+ */
+static nir_ssa_def *
+ssa_for_phi_from_block(nir_phi_instr *phi, nir_block *block)
+{
+   nir_foreach_phi_src(src, phi) {
+      if (src->pred == block)
+         return src->src.ssa;
+   }
+
+   assert(!"Block is not a predecessor of phi.");
+   return NULL;
+}
+
+/**
+ * Simplify a bcsel whose sources are all phi nodes from the loop header block
+ *
+ * bcsel instructions in a loop that meet the following criteria can be
+ * converted to phi nodes:
+ *
+ * - The loop has no continue instructions other than the "natural" continue
+ *   at the bottom of the loop.
+ *
+ * - All of the sources of the bcsel are phi nodes in the header block of the
+ *   loop.
+ *
+ * - The phi node representing the condition of the bcsel instruction chooses
+ *   only constant values.
+ *
+ * The contant value from the condition will select one of the other sources
+ * when entered from outside the loop and the remaining source when entered
+ * from the continue block.  Since each of these sources is also a phi node in
+ * the header block, the value of the phi node can be "evaluated."  These
+ * evaluated phi nodes provide the sources for a new phi node.  All users of
+ * the bcsel result are updated to use the phi node result.
+ *
+ * The replacement transforms loops like:
+ *
+ *    vec1 32 ssa_7 = undefined
+ *    vec1 32 ssa_8 = load_const (0x00000001)
+ *    vec1 32 ssa_9 = load_const (0x000000c8)
+ *    vec1 32 ssa_10 = load_const (0x00000000)
+ *    // succs: block_1
+ *    loop {
+ *            block block_1:
+ *            // preds: block_0 block_4
+ *            vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
+ *            vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
+ *            vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
+ *            vec1 32 ssa_14 = b32csel ssa_12, ssa_13, ssa_11
+ *            vec1 32 ssa_16 = ige32 ssa_14, ssa_9
+ *            ...
+ *            vec1 32 ssa_15 = load_const (0xffffffff)
+ *            ...
+ *            vec1 32 ssa_25 = iadd ssa_14, ssa_8
+ *            // succs: block_1
+ *    }
+ *
+ * into:
+ *
+ *    vec1 32 ssa_7 = undefined
+ *    vec1 32 ssa_8 = load_const (0x00000001)
+ *    vec1 32 ssa_9 = load_const (0x000000c8)
+ *    vec1 32 ssa_10 = load_const (0x00000000)
+ *    // succs: block_1
+ *    loop {
+ *            block block_1:
+ *            // preds: block_0 block_4
+ *            vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
+ *            vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
+ *            vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
+ *            vec1 32 sss_26 = phi block_0: ssa_1, block_4: ssa_25
+ *            vec1 32 ssa_16 = ige32 ssa_26, ssa_9
+ *            ...
+ *            vec1 32 ssa_15 = load_const (0xffffffff)
+ *            ...
+ *            vec1 32 ssa_25 = iadd ssa_26, ssa_8
+ *            // succs: block_1
+ *    }
+ *
+ * \note
+ * It may be possible modify this function to not require a phi node as the
+ * source of the bcsel that is selected when entering from outside the loop.
+ * The only restriction is that the source must be geneated outside the loop
+ * (since it will become the source of a phi node in the header block of the
+ * loop).
+ */
+static bool
+opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop)
+{
+   bool progress = false;
+   nir_block *header_block = nir_loop_first_block(loop);
+   nir_block *const prev_block =
+      nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+
+   /* It would be insane if this were not true */
+   assert(_mesa_set_search(header_block->predecessors, prev_block));
+
+   /* The loop must have exactly one continue block which could be a block
+    * ending in a continue instruction or the "natural" continue from the
+    * last block in the loop back to the top.
+    */
+   if (header_block->predecessors->entries != 2)
+      return false;
+
+   /* We can move any bcsel that can guaranteed to execut on every iteration
+    * of a loop.  For now this is accomplished by only taking bcsels from the
+    * header_block.  In the future, this could be expanced to include any
+    * bcsel that must come before any break.
+    *
+    * For more details, see
+    * https://gitlab.freedesktop.org/mesa/mesa/merge_requests/170#note_110305
+    */
+   nir_foreach_instr_safe(instr, header_block) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
+      if (bcsel->op != nir_op_bcsel &&
+          bcsel->op != nir_op_b32csel &&
+          bcsel->op != nir_op_fcsel)
+         continue;
+
+      bool match = true;
+      for (unsigned i = 0; i < 3; i++) {
+         /* FINISHME: The abs and negate cases could be handled by adding
+          * move instructions at the bottom of the continue block and more
+          * phi nodes in the header_block.
+          */
+         if (!bcsel->src[i].src.is_ssa ||
+             bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi ||
+             bcsel->src[i].src.ssa->parent_instr->block != header_block ||
+             bcsel->src[i].negate || bcsel->src[i].abs) {
+            match = false;
+            break;
+         }
+      }
+
+      if (!match)
+         continue;
+
+      nir_phi_instr *const cond_phi =
+         nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr);
+
+      uint32_t entry_val = 0, continue_val = 0;
+      if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi,
+                                                                  prev_block,
+                                                                  &entry_val,
+                                                                  &continue_val))
+         continue;
+
+      /* If they both execute or both don't execute, this is a job for
+       * nir_dead_cf, not this pass.
+       */
+      if ((entry_val && continue_val) || (!entry_val && !continue_val))
+         continue;
+
+      const unsigned entry_src = entry_val ? 1 : 2;
+      const unsigned continue_src = entry_val ? 2 : 1;
+
+      /* Create a new phi node that selects the value for prev_block from
+       * the bcsel source that is selected by entry_val and the value for
+       * continue_block from the other bcsel source.  Both sources have
+       * already been verified to be phi nodes.
+       */
+      nir_block *const continue_block = find_continue_block(loop);
+      nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
+      nir_phi_src *phi_src;
+
+      phi_src = ralloc(phi, nir_phi_src);
+      phi_src->pred = prev_block;
+      phi_src->src =
+         nir_src_for_ssa(ssa_for_phi_from_block(nir_instr_as_phi(bcsel->src[entry_src].src.ssa->parent_instr),
+                                                prev_block));
+      exec_list_push_tail(&phi->srcs, &phi_src->node);
+
+      phi_src = ralloc(phi, nir_phi_src);
+      phi_src->pred = continue_block;
+      phi_src->src =
+         nir_src_for_ssa(ssa_for_phi_from_block(nir_instr_as_phi(bcsel->src[continue_src].src.ssa->parent_instr),
+                                                continue_block));
+      exec_list_push_tail(&phi->srcs, &phi_src->node);
+
+      nir_ssa_dest_init(&phi->instr,
+                        &phi->dest,
+                        nir_dest_num_components(bcsel->dest.dest),
+                        nir_dest_bit_size(bcsel->dest.dest),
+                        NULL);
+
+      b->cursor = nir_after_phis(header_block);
+      nir_builder_instr_insert(b, &phi->instr);
+
+      /* Modify all readers of the bcsel instruction to read the result of
+       * the phi.
+       */
+      nir_foreach_use_safe(use_src, &bcsel->dest.dest.ssa) {
+         nir_instr_rewrite_src(use_src->parent_instr,
+                               use_src,
+                               nir_src_for_ssa(&phi->dest.ssa));
+      }
+
+      nir_foreach_if_use_safe(use_src, &bcsel->dest.dest.ssa) {
+         nir_if_rewrite_condition(use_src->parent_if,
+                                  nir_src_for_ssa(&phi->dest.ssa));
+      }
+
+      /* Since the original bcsel instruction no longer has any readers,
+       * just remove it.
+       */
+      nir_instr_remove_v(&bcsel->instr);
+      ralloc_free(bcsel);
+
+      progress = true;
+   }
+
+   return progress;
+}
+
 static bool
 is_block_empty(nir_block *block)
 {
@@ -1109,6 +1328,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
       case nir_cf_node_loop: {
          nir_loop *loop = nir_cf_node_as_loop(cf_node);
          progress |= opt_if_cf_list(b, &loop->body);
+         progress |= opt_simplify_bcsel_of_phi(b, loop);
          progress |= opt_peel_loop_initial_if(loop);
          progress |= opt_if_loop_last_continue(loop);
          break;