nir: Make nir_search's dumping go to stderr.
[mesa.git] / src / compiler / nir / nir_opt_loop_unroll.c
index a5a5ca3deeec5583048fdac1be3228b57270af02..0681518d9606bc9544f16561b394ac97a8634c13 100644 (file)
@@ -42,7 +42,7 @@
  * to keep track of and update phis along the way which gets tricky and
  * doesn't add much value over converting to regs.
  *
- * The loop may have a continue instruction at the end of the loop which does
+ * The loop may have a jump instruction at the end of the loop which does
  * nothing.  Once we're out of SSA, we can safely delete it so we don't have
  * to deal with it later.
  */
@@ -67,7 +67,7 @@ loop_prepare_for_unroll(nir_loop *loop)
 
    nir_lower_phis_to_regs_block(block_after_loop);
 
-   /* Remove continue if its the last instruction in the loop */
+   /* Remove jump if it's the last instruction in the loop */
    nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
    if (last_instr && last_instr->type == nir_instr_type_jump) {
       nir_instr_remove(last_instr);
@@ -456,6 +456,55 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
    _mesa_hash_table_destroy(remap_table, NULL);
 }
 
+/**
+ * Unroll loops where we only have a single terminator but the exact trip
+ * count is unknown. For example:
+ *
+ *    for (int i = 0; i < imin(x, 4); i++)
+ *       ...
+ */
+static void
+complex_unroll_single_terminator(nir_loop *loop)
+{
+   assert(list_length(&loop->info->loop_terminator_list) == 1);
+   assert(loop->info->limiting_terminator);
+   assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
+                                 loop->info->limiting_terminator->break_block));
+
+   nir_loop_terminator *terminator = loop->info->limiting_terminator;
+
+   loop_prepare_for_unroll(loop);
+
+   /* Pluck out the loop header */
+   nir_cf_list lp_header;
+   nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
+                  nir_before_cf_node(&terminator->nif->cf_node));
+
+   struct hash_table *remap_table =
+      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                              _mesa_key_pointer_equal);
+
+   /* We need to clone the loop one extra time in order to clone the lcssa
+    * vars for the last iteration (they are inside the following ifs break
+    * branch). We leave other passes to clean up this redundant if.
+    */
+   unsigned num_times_to_clone = loop->info->max_trip_count + 1;
+
+   nir_cf_list lp_body;
+   UNUSED nir_cf_node *unroll_loc =
+      complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
+                               remap_table, num_times_to_clone);
+
+   /* Delete the original loop header and body */
+   nir_cf_delete(&lp_header);
+   nir_cf_delete(&lp_body);
+
+   /* The original loop has been replaced so remove it. */
+   nir_cf_node_remove(&loop->cf_node);
+
+   _mesa_hash_table_destroy(remap_table, NULL);
+}
+
 /* Unrolls the classic wrapper loops e.g
  *
  *    do {
@@ -511,31 +560,7 @@ wrapper_unroll(nir_loop *loop)
            nir_after_block(nir_if_last_else_block(terminator->nif));
       }
    } else {
-      nir_block *blk_after_loop =
-         nir_cursor_current_block(nir_after_cf_node(&loop->cf_node));
-
-      /* There may still be some single src phis following the loop that
-       * have not yet been cleaned up by another pass. Tidy those up
-       * before unrolling the loop.
-       */
-      nir_foreach_instr_safe(instr, blk_after_loop) {
-         if (instr->type != nir_instr_type_phi)
-            break;
-
-         nir_phi_instr *phi = nir_instr_as_phi(instr);
-         assert(exec_list_length(&phi->srcs) == 1);
-
-         nir_phi_src *phi_src =
-            exec_node_data(nir_phi_src, exec_list_get_head(&phi->srcs), node);
-
-         nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src);
-         nir_instr_remove(instr);
-      }
-
-      /* Remove break at end of the loop */
-      nir_block *last_loop_blk = nir_loop_last_block(loop);
-      nir_instr *break_instr = nir_block_last_instr(last_loop_blk);
-      nir_instr_remove(break_instr);
+      loop_prepare_for_unroll(loop);
    }
 
    /* Pluck out the loop body. */
@@ -621,11 +646,9 @@ remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
             if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
                                         trip_count)) {
                if (intrin->intrinsic == nir_intrinsic_load_deref) {
-                  assert(intrin->src[0].is_ssa);
-                  nir_ssa_def *a_ssa = intrin->src[0].ssa;
                   nir_ssa_def *undef =
-                     nir_ssa_undef(&b, intrin->num_components,
-                                   a_ssa->bit_size);
+                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
+                                   intrin->dest.ssa.bit_size);
                   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                                            nir_src_for_ssa(undef));
                } else {
@@ -637,14 +660,6 @@ remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
             if (intrin->intrinsic == nir_intrinsic_copy_deref &&
                 is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
                                         trip_count)) {
-               assert(intrin->src[1].is_ssa);
-               nir_ssa_def *a_ssa = intrin->src[1].ssa;
-               nir_ssa_def *undef =
-                  nir_ssa_undef(&b, intrin->num_components, a_ssa->bit_size);
-
-               /* Replace the copy with a store of the undefined value */
-               b.cursor = nir_before_instr(instr);
-               nir_store_deref(&b, nir_src_as_deref(intrin->src[0]), undef, ~0);
                nir_instr_remove(instr);
             }
          }
@@ -731,11 +746,20 @@ partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
    _mesa_hash_table_destroy(remap_table, NULL);
 }
 
+/*
+ * Returns true if we should unroll the loop, otherwise false.
+ */
 static bool
-is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
+check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
 {
-   unsigned max_iter = shader->options->max_unroll_iterations;
+   if (loop->control == nir_loop_control_unroll)
+      return true;
 
+   if (loop->control == nir_loop_control_dont_unroll)
+      return false;
+
+   nir_loop_info *li = loop->info;
+   unsigned max_iter = shader->options->max_unroll_iterations;
    unsigned trip_count =
       li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
 
@@ -783,7 +807,7 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
    /* Don't attempt to unroll a second inner loop in this pass, wait until the
     * next pass as we have altered the cf.
     */
-   if (!progress) {
+   if (!progress && loop->control != nir_loop_control_dont_unroll) {
 
       /* Check for the classic
        *
@@ -809,7 +833,7 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
          unsigned num_lt = list_length(&loop->info->loop_terminator_list);
          if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled &&
              loop->info->guessed_trip_count &&
-             is_loop_small_enough_to_unroll(sh, loop->info)) {
+             check_unrolling_restrictions(sh, loop)) {
             partial_unroll(sh, loop, loop->info->guessed_trip_count);
             progress = true;
          }
@@ -818,7 +842,7 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
       if (has_nested_loop || !loop->info->limiting_terminator)
          goto exit;
 
-      if (!is_loop_small_enough_to_unroll(sh, loop->info))
+      if (!check_unrolling_restrictions(sh, loop))
          goto exit;
 
       if (loop->info->exact_trip_count_known) {
@@ -853,6 +877,12 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
             }
             progress = true;
          }
+
+         if (num_lt == 1) {
+            assert(loop->info->limiting_terminator->exact_trip_count_unknown);
+            complex_unroll_single_terminator(loop);
+            progress = true;
+         }
       }
    }