nir/lower_ssbo: Don't set align_* for atomics
[mesa.git] / src / compiler / nir / nir_opt_loop_unroll.c
index 955dfede6947457a336cafa11769fe76504dd856..c5e7b8c9b5169e5c2962371a14166477bff661bd 100644 (file)
  * to keep track of and update phis along the way which gets tricky and
  * doesn't add much value over converting to regs.
  *
- * The loop may have a continue instruction at the end of the loop which does
+ * The loop may have a jump instruction at the end of the loop which does
  * nothing.  Once we're out of SSA, we can safely delete it so we don't have
  * to deal with it later.
  */
 static void
 loop_prepare_for_unroll(nir_loop *loop)
 {
+   nir_rematerialize_derefs_in_use_blocks_impl(
+      nir_cf_node_get_function(&loop->cf_node));
+
    nir_convert_loop_to_lcssa(loop);
 
    /* Lower phis at the top level of the loop body */
@@ -64,10 +67,9 @@ loop_prepare_for_unroll(nir_loop *loop)
 
    nir_lower_phis_to_regs_block(block_after_loop);
 
-   /* Remove continue if its the last instruction in the loop */
+   /* Remove jump if it's the last instruction in the loop */
    nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
    if (last_instr && last_instr->type == nir_instr_type_jump) {
-      assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);
       nir_instr_remove(last_instr);
    }
 }
@@ -163,36 +165,23 @@ simple_unroll(nir_loop *loop)
    nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
                   nir_after_block(nir_loop_last_block(loop)));
 
-   struct hash_table *remap_table =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
-
-   /* Clone the loop header */
-   nir_cf_list cloned_header;
-   nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
-                     remap_table);
-
-   /* Insert cloned loop header before the loop */
-   nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
-
-   /* Temp list to store the cloned loop body as we unroll */
-   nir_cf_list unrolled_lp_body;
-
-   /* Clone loop header and append to the loop body */
-   for (unsigned i = 0; i < loop->info->trip_count; i++) {
-      /* Clone loop body */
-      nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
-                        remap_table);
+   struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
 
-      /* Insert unrolled loop body before the loop */
-      nir_cf_reinsert(&unrolled_lp_body, nir_before_cf_node(&loop->cf_node));
+   /* Clone the loop header and insert before the loop */
+   nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+                                  nir_before_cf_node(&loop->cf_node),
+                                  remap_table);
 
-      /* Clone loop header */
-      nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
-                        remap_table);
+   for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
+      /* Clone loop body and insert before the loop */
+      nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
+                                     nir_before_cf_node(&loop->cf_node),
+                                     remap_table);
 
-      /* Insert loop header after loop body */
-      nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
+      /* Clone loop header and insert after loop body */
+      nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+                                     nir_before_cf_node(&loop->cf_node),
+                                     remap_table);
    }
 
    /* Remove the break from the loop terminator and add instructions from
@@ -205,11 +194,9 @@ simple_unroll(nir_loop *loop)
                   nir_after_block(limiting_term->break_block));
 
    /* Clone so things get properly remapped */
-   nir_cf_list cloned_break_list;
-   nir_cf_list_clone(&cloned_break_list, &break_list, loop->cf_node.parent,
-                     remap_table);
-
-   nir_cf_reinsert(&cloned_break_list, nir_before_cf_node(&loop->cf_node));
+   nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
+                                  nir_before_cf_node(&loop->cf_node),
+                                  remap_table);
 
    /* Remove the loop */
    nir_cf_node_remove(&loop->cf_node);
@@ -247,6 +234,65 @@ get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
    }
 }
 
+static nir_cf_node *
+complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
+                         nir_cf_list *lp_header, nir_cf_list *lp_body,
+                         struct hash_table *remap_table,
+                         unsigned num_times_to_clone)
+{
+   /* In the terminator that we have no trip count for move everything after
+    * the terminator into the continue from branch.
+    */
+   nir_cf_list loop_end;
+   nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
+                  nir_after_block(nir_loop_last_block(loop)));
+   move_cf_list_into_loop_term(&loop_end, unlimit_term);
+
+   /* Pluck out the loop body. */
+   nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
+                  nir_after_block(nir_loop_last_block(loop)));
+
+   /* Set unroll_loc to the loop as we will insert the unrolled loop before it
+    */
+   nir_cf_node *unroll_loc = &loop->cf_node;
+
+   /* Temp list to store the cloned loop as we unroll */
+   nir_cf_list unrolled_lp_body;
+
+   for (unsigned i = 0; i < num_times_to_clone; i++) {
+
+      nir_cursor cursor =
+         get_complex_unroll_insert_location(unroll_loc,
+                                            unlimit_term->continue_from_then);
+
+      /* Clone loop header and insert in if branch */
+      nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
+                                     cursor, remap_table);
+
+      cursor =
+         get_complex_unroll_insert_location(unroll_loc,
+                                            unlimit_term->continue_from_then);
+
+      /* Clone loop body */
+      nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
+                        remap_table);
+
+      unroll_loc = exec_node_data(nir_cf_node,
+                                  exec_list_get_tail(&unrolled_lp_body.list),
+                                  node);
+      assert(unroll_loc->type == nir_cf_node_block &&
+             exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
+
+      /* Get the unrolled if node */
+      unroll_loc = nir_cf_node_prev(unroll_loc);
+
+      /* Insert unrolled loop body */
+      nir_cf_reinsert(&unrolled_lp_body, cursor);
+   }
+
+   return unroll_loc;
+}
+
 /**
  * Unroll a loop with two exists when the trip count of one of the exits is
  * unknown.  If continue_from_then is true, the loop is repeated only when the
@@ -338,7 +384,7 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
        * trip count == 1 we execute the code above the break twice and the
        * code below it once so we need clone things twice and so on.
        */
-      num_times_to_clone = loop->info->trip_count + 1;
+      num_times_to_clone = loop->info->max_trip_count + 1;
    } else {
       /* Pluck out the loop header */
       nir_cf_extract(&lp_header, nir_before_block(header_blk),
@@ -366,126 +412,430 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
 
       nir_cf_node_remove(&limiting_term->nif->cf_node);
 
-      num_times_to_clone = loop->info->trip_count;
+      num_times_to_clone = loop->info->max_trip_count;
    }
 
-   /* In the terminator that we have no trip count for move everything after
-    * the terminator into the continue from branch.
-    */
-   nir_cf_list loop_end;
-   nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
-                  nir_after_block(nir_loop_last_block(loop)));
-   move_cf_list_into_loop_term(&loop_end, unlimit_term);
-
-   /* Pluck out the loop body. */
-   nir_cf_list loop_body;
-   nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
-                  nir_after_block(nir_loop_last_block(loop)));
+   struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
 
-   struct hash_table *remap_table =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+   nir_cf_list lp_body;
+   nir_cf_node *unroll_loc =
+      complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
+                               remap_table, num_times_to_clone);
 
-   /* Set unroll_loc to the loop as we will insert the unrolled loop before it
-    */
-   nir_cf_node *unroll_loc = &loop->cf_node;
-
-   /* Temp lists to store the cloned loop as we unroll */
-   nir_cf_list unrolled_lp_body;
-   nir_cf_list cloned_header;
-
-   for (unsigned i = 0; i < num_times_to_clone; i++) {
-      /* Clone loop header */
-      nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
-                        remap_table);
+   if (!limiting_term_second) {
+      assert(unroll_loc->type == nir_cf_node_if);
 
       nir_cursor cursor =
          get_complex_unroll_insert_location(unroll_loc,
                                             unlimit_term->continue_from_then);
 
-      /* Insert cloned loop header */
-      nir_cf_reinsert(&cloned_header, cursor);
+      /* Clone loop header and insert in if branch */
+      nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+                                     cursor, remap_table);
 
       cursor =
          get_complex_unroll_insert_location(unroll_loc,
                                             unlimit_term->continue_from_then);
 
-      /* Clone loop body */
-      nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
-                        remap_table);
+      /* Clone so things get properly remapped, and insert break block from
+       * the limiting terminator.
+       */
+      nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
+                                     cursor, remap_table);
 
-      unroll_loc = exec_node_data(nir_cf_node,
-                                  exec_list_get_tail(&unrolled_lp_body.list),
-                                  node);
-      assert(unroll_loc->type == nir_cf_node_block &&
-             exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
+      nir_cf_delete(&limit_break_list);
+   }
 
-      /* Get the unrolled if node */
-      unroll_loc = nir_cf_node_prev(unroll_loc);
+   /* The loop has been unrolled so remove it. */
+   nir_cf_node_remove(&loop->cf_node);
 
-      /* Insert unrolled loop body */
-      nir_cf_reinsert(&unrolled_lp_body, cursor);
-   }
+   /* Delete the original loop header and body */
+   nir_cf_delete(&lp_header);
+   nir_cf_delete(&lp_body);
 
-   if (!limiting_term_second) {
-      assert(unroll_loc->type == nir_cf_node_if);
+   _mesa_hash_table_destroy(remap_table, NULL);
+}
 
-      nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
-                        remap_table);
+/**
+ * Unroll loops where we only have a single terminator but the exact trip
+ * count is unknown. For example:
+ *
+ *    for (int i = 0; i < imin(x, 4); i++)
+ *       ...
+ */
+static void
+complex_unroll_single_terminator(nir_loop *loop)
+{
+   assert(list_length(&loop->info->loop_terminator_list) == 1);
+   assert(loop->info->limiting_terminator);
+   assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
+                                 loop->info->limiting_terminator->break_block));
 
-      nir_cursor cursor =
-         get_complex_unroll_insert_location(unroll_loc,
-                                            unlimit_term->continue_from_then);
+   nir_loop_terminator *terminator = loop->info->limiting_terminator;
 
-      /* Insert cloned loop header */
-      nir_cf_reinsert(&cloned_header, cursor);
+   loop_prepare_for_unroll(loop);
 
-      /* Clone so things get properly remapped, and insert break block from
-       * the limiting terminator.
-       */
-      nir_cf_list cloned_break_blk;
-      nir_cf_list_clone(&cloned_break_blk, &limit_break_list,
-                        loop->cf_node.parent, remap_table);
+   /* Pluck out the loop header */
+   nir_cf_list lp_header;
+   nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
+                  nir_before_cf_node(&terminator->nif->cf_node));
 
-      cursor =
-         get_complex_unroll_insert_location(unroll_loc,
-                                            unlimit_term->continue_from_then);
+   struct hash_table *remap_table =
+      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                              _mesa_key_pointer_equal);
 
-      nir_cf_reinsert(&cloned_break_blk, cursor);
-      nir_cf_delete(&limit_break_list);
+   /* We need to clone the loop one extra time in order to clone the lcssa
+    * vars for the last iteration (they are inside the following ifs break
+    * branch). We leave other passes to clean up this redundant if.
+    */
+   unsigned num_times_to_clone = loop->info->max_trip_count + 1;
+
+   nir_cf_list lp_body;
+   UNUSED nir_cf_node *unroll_loc =
+      complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
+                               remap_table, num_times_to_clone);
+
+   /* Delete the original loop header and body */
+   nir_cf_delete(&lp_header);
+   nir_cf_delete(&lp_body);
+
+   /* The original loop has been replaced so remove it. */
+   nir_cf_node_remove(&loop->cf_node);
+
+   _mesa_hash_table_destroy(remap_table, NULL);
+}
+
+/* Unrolls the classic wrapper loops e.g
+ *
+ *    do {
+ *        // ...
+ *    } while (false)
+ */
+static bool
+wrapper_unroll(nir_loop *loop)
+{
+   if (!list_is_empty(&loop->info->loop_terminator_list)) {
+
+      /* Unrolling a loop with a large number of exits can result in a
+       * large inrease in register pressure. For now we just skip
+       * unrolling if we have more than 3 exits (not including the break
+       * at the end of the loop).
+       *
+       * TODO: Most loops that fit this pattern are simply switch
+       * statements that are converted to a loop to take advantage of
+       * exiting jump instruction handling. In this case we could make
+       * use of a binary seach pattern like we do in
+       * nir_lower_indirect_derefs(), this should allow us to unroll the
+       * loops in an optimal way and should also avoid some of the
+       * register pressure that comes from simply nesting the
+       * terminators one after the other.
+       */
+      if (list_length(&loop->info->loop_terminator_list) > 3)
+         return false;
+
+      loop_prepare_for_unroll(loop);
+
+      nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop));
+      list_for_each_entry(nir_loop_terminator, terminator,
+                          &loop->info->loop_terminator_list,
+                          loop_terminator_link) {
+
+         /* Remove break from the terminator */
+         nir_instr *break_instr =
+            nir_block_last_instr(terminator->break_block);
+         nir_instr_remove(break_instr);
+
+         /* Pluck out the loop body. */
+         nir_cf_list loop_body;
+         nir_cf_extract(&loop_body,
+                        nir_after_cf_node(&terminator->nif->cf_node),
+                        loop_end);
+
+         /* Reinsert loop body into continue from block */
+         nir_cf_reinsert(&loop_body,
+                         nir_after_block(terminator->continue_from_block));
+
+         loop_end = terminator->continue_from_then ?
+           nir_after_block(nir_if_last_then_block(terminator->nif)) :
+           nir_after_block(nir_if_last_else_block(terminator->nif));
+      }
+   } else {
+      loop_prepare_for_unroll(loop);
    }
 
+   /* Pluck out the loop body. */
+   nir_cf_list loop_body;
+   nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
+                  nir_after_block(nir_loop_last_block(loop)));
+
+   /* Reinsert loop body after the loop */
+   nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
+
    /* The loop has been unrolled so remove it. */
    nir_cf_node_remove(&loop->cf_node);
 
+   return true;
+}
+
+static bool
+is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
+                        unsigned trip_count)
+{
+   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+      if (d->deref_type != nir_deref_type_array)
+         continue;
+
+      nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
+      nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
+      if (!nir_srcs_equal(d->arr.index, src))
+         continue;
+
+      nir_deref_instr *parent = nir_deref_instr_parent(d);
+      assert(glsl_type_is_array(parent->type) ||
+             glsl_type_is_matrix(parent->type));
+
+      /* We have already unrolled the loop and the new one will be imbedded in
+       * the innermost continue branch. So unless the array is greater than
+       * the trip count any iteration over the loop will be an out of bounds
+       * access of the array.
+       */
+      return glsl_get_length(parent->type) <= trip_count;
+   }
+
+   return false;
+}
+
+/* If we know an array access is going to be out of bounds remove or replace
+ * the access with an undef. This can later result in the entire loop being
+ * removed by nir_opt_dead_cf().
+ */
+static void
+remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
+                                   nir_loop_terminator *term,
+                                   nir_cf_list *lp_header,
+                                   nir_cf_list *lp_body,
+                                   unsigned trip_count)
+{
+   if (!loop->info->guessed_trip_count)
+      return;
+
+   /* Temporarily recreate the original loop so we can alter it */
+   nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
+   nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
+
+   nir_builder b;
+   nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node));
+
+   nir_foreach_block_in_cf_node(block, &loop->cf_node) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         /* Check for arrays variably-indexed by a loop induction variable.
+          * If this access is out of bounds remove the instruction or replace
+          * its use with an undefined instruction.
+          * If the loop is no longer useful we leave it for the appropriate
+          * pass to clean it up for us.
+          */
+         if (intrin->intrinsic == nir_intrinsic_load_deref ||
+             intrin->intrinsic == nir_intrinsic_store_deref ||
+             intrin->intrinsic == nir_intrinsic_copy_deref) {
+
+            if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
+                                        trip_count)) {
+               if (intrin->intrinsic == nir_intrinsic_load_deref) {
+                  nir_ssa_def *undef =
+                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
+                                   intrin->dest.ssa.bit_size);
+                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                           nir_src_for_ssa(undef));
+               } else {
+                  nir_instr_remove(instr);
+                  continue;
+               }
+            }
+
+            if (intrin->intrinsic == nir_intrinsic_copy_deref &&
+                is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
+                                        trip_count)) {
+               nir_instr_remove(instr);
+            }
+         }
+      }
+   }
+
+   /* Now that we are done extract the loop header and body again */
+   nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)),
+                  nir_before_cf_node(&term->nif->cf_node));
+   nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
+                  nir_after_block(nir_loop_last_block(loop)));
+}
+
+/* Partially unrolls loops that don't have a known trip count.
+ */
+static void
+partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
+{
+   assert(list_length(&loop->info->loop_terminator_list) == 1);
+
+   nir_loop_terminator *terminator =
+      list_first_entry(&loop->info->loop_terminator_list,
+                        nir_loop_terminator, loop_terminator_link);
+
+   assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block));
+
+   loop_prepare_for_unroll(loop);
+
+   /* Pluck out the loop header */
+   nir_cf_list lp_header;
+   nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
+                  nir_before_cf_node(&terminator->nif->cf_node));
+
+   struct hash_table *remap_table =
+      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                              _mesa_key_pointer_equal);
+
+   nir_cf_list lp_body;
+   nir_cf_node *unroll_loc =
+      complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
+                               remap_table, trip_count);
+
+   /* Attempt to remove out of bounds array access */
+   remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header,
+                                      &lp_body, trip_count);
+
+   nir_cursor cursor =
+      get_complex_unroll_insert_location(unroll_loc,
+                                         terminator->continue_from_then);
+
+   /* Reinsert the loop in the innermost nested continue branch of the unrolled
+    * loop.
+    */
+   nir_loop *new_loop = nir_loop_create(shader);
+   nir_cf_node_insert(cursor, &new_loop->cf_node);
+   new_loop->partially_unrolled = true;
+
+   /* Clone loop header and insert into new loop */
+   nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+                                  nir_after_cf_list(&new_loop->body),
+                                  remap_table);
+
+   /* Clone loop body and insert into new loop */
+   nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent,
+                                  nir_after_cf_list(&new_loop->body),
+                                  remap_table);
+
+   /* Insert break back into terminator */
+   nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break);
+   nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop));
+   if (terminator->continue_from_then) {
+      nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr);
+   } else {
+      nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr);
+   }
+
    /* Delete the original loop header and body */
    nir_cf_delete(&lp_header);
-   nir_cf_delete(&loop_body);
+   nir_cf_delete(&lp_body);
+
+   /* The original loop has been replaced so remove it. */
+   nir_cf_node_remove(&loop->cf_node);
 
    _mesa_hash_table_destroy(remap_table, NULL);
 }
 
+/*
+ * Returns true if we should unroll the loop, otherwise false.
+ */
 static bool
-is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
+check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
 {
+   if (loop->control == nir_loop_control_unroll)
+      return true;
+
+   if (loop->control == nir_loop_control_dont_unroll)
+      return false;
+
+   nir_loop_info *li = loop->info;
    unsigned max_iter = shader->options->max_unroll_iterations;
+   unsigned trip_count =
+      li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
 
-   if (li->trip_count > max_iter)
+   if (trip_count > max_iter)
       return false;
 
-   if (li->force_unroll)
+   if (li->force_unroll && !li->guessed_trip_count)
       return true;
 
    bool loop_not_too_large =
-      li->num_instructions * li->trip_count <= max_iter * LOOP_UNROLL_LIMIT;
+      li->instr_cost * trip_count <= max_iter * LOOP_UNROLL_LIMIT;
 
    return loop_not_too_large;
 }
 
 static bool
-process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop)
+process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
+              bool *unrolled_this_block);
+
+static bool
+process_loops_in_block(nir_shader *sh, struct exec_list *block,
+                       bool *has_nested_loop_out)
+{
+   /* We try to unroll as many loops in one pass as possible.
+    * E.g. we can safely unroll both loops in this block:
+    *
+    *    if (...) {
+    *       loop {...}
+    *    }
+    *
+    *    if (...) {
+    *       loop {...}
+    *    }
+    *
+    * Unrolling one loop doesn't affect the other one.
+    *
+    * On the other hand for block with:
+    *
+    *    loop {...}
+    *    ...
+    *    loop {...}
+    *
+    * It is unsafe to unroll both loops in one pass without taking
+    * complicating precautions, since the structure of the block would
+    * change after unrolling the first loop. So in such a case we leave
+    * the second loop for the next iteration of unrolling to handle.
+    */
+
+   bool progress = false;
+   bool unrolled_this_block = false;
+
+   foreach_list_typed(nir_cf_node, nested_node, node, block) {
+      if (process_loops(sh, nested_node,
+                        has_nested_loop_out, &unrolled_this_block)) {
+         progress = true;
+
+         /* If current node is unrolled we could not safely continue
+          * our iteration since we don't know the next node
+          * and it's hard to guarantee that we won't end up unrolling
+          * inner loop of the currently unrolled one, if such exists.
+          */
+         if (unrolled_this_block) {
+            break;
+         }
+      }
+   }
+
+   return progress;
+}
+
+static bool
+process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
+              bool *unrolled_this_block)
 {
    bool progress = false;
+   bool has_nested_loop = false;
    nir_loop *loop;
 
    switch (cf_node->type) {
@@ -493,41 +843,73 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop)
       return progress;
    case nir_cf_node_if: {
       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
-      foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list)
-         progress |= process_loops(sh, nested_node, innermost_loop);
-      foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list)
-         progress |= process_loops(sh, nested_node, innermost_loop);
+      progress |= process_loops_in_block(sh, &if_stmt->then_list,
+                                         has_nested_loop_out);
+      progress |= process_loops_in_block(sh, &if_stmt->else_list,
+                                         has_nested_loop_out);
       return progress;
    }
    case nir_cf_node_loop: {
       loop = nir_cf_node_as_loop(cf_node);
-      foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body)
-         progress |= process_loops(sh, nested_node, innermost_loop);
+      progress |= process_loops_in_block(sh, &loop->body, &has_nested_loop);
+
       break;
    }
    default:
       unreachable("unknown cf node type");
    }
 
-   if (*innermost_loop) {
-      /* Don't attempt to unroll outer loops or a second inner loop in
-       * this pass wait until the next pass as we have altered the cf.
+   const bool unrolled_child_block = progress;
+
+   /* Don't attempt to unroll a second inner loop in this pass, wait until the
+    * next pass as we have altered the cf.
+    */
+   if (!progress && loop->control != nir_loop_control_dont_unroll) {
+
+      /* Check for the classic
+       *
+       *    do {
+       *        // ...
+       *    } while (false)
+       *
+       * that is used to wrap multi-line macros. GLSL IR also wraps switch
+       * statements in a loop like this.
        */
-      *innermost_loop = false;
+      if (loop->info->limiting_terminator == NULL &&
+          !loop->info->complex_loop) {
+
+         nir_block *last_loop_blk = nir_loop_last_block(loop);
+         if (nir_block_ends_in_break(last_loop_blk)) {
+            progress = wrapper_unroll(loop);
+            goto exit;
+         }
+
+         /* If we were able to guess the loop iteration based on array access
+          * then do a partial unroll.
+          */
+         unsigned num_lt = list_length(&loop->info->loop_terminator_list);
+         if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled &&
+             loop->info->guessed_trip_count &&
+             check_unrolling_restrictions(sh, loop)) {
+            partial_unroll(sh, loop, loop->info->guessed_trip_count);
+            progress = true;
+         }
+      }
 
-      if (loop->info->limiting_terminator == NULL)
-         return progress;
+      if (has_nested_loop || !loop->info->limiting_terminator)
+         goto exit;
 
-      if (!is_loop_small_enough_to_unroll(sh, loop->info))
-         return progress;
+      if (!check_unrolling_restrictions(sh, loop))
+         goto exit;
 
-      if (loop->info->is_trip_count_known) {
+      if (loop->info->exact_trip_count_known) {
          simple_unroll(loop);
          progress = true;
       } else {
          /* Attempt to unroll loops with two terminators. */
          unsigned num_lt = list_length(&loop->info->loop_terminator_list);
-         if (num_lt == 2) {
+         if (num_lt == 2 &&
+             !loop->info->limiting_terminator->exact_trip_count_unknown) {
             bool limiting_term_second = true;
             nir_loop_terminator *terminator =
                list_first_entry(&loop->info->loop_terminator_list,
@@ -545,16 +927,27 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop)
              * limiting terminator just do a simple unroll as the second
              * terminator can never be reached.
              */
-            if (loop->info->trip_count == 0 && !limiting_term_second) {
+            if (loop->info->max_trip_count == 0 && !limiting_term_second) {
                simple_unroll(loop);
             } else {
                complex_unroll(loop, terminator, limiting_term_second);
             }
             progress = true;
          }
+
+         if (num_lt == 1) {
+            assert(loop->info->limiting_terminator->exact_trip_count_unknown);
+            complex_unroll_single_terminator(loop);
+            progress = true;
+         }
       }
    }
 
+exit:
+   *has_nested_loop_out = true;
+   if (progress && !unrolled_child_block)
+      *unrolled_this_block = true;
+
    return progress;
 }
 
@@ -566,14 +959,16 @@ nir_opt_loop_unroll_impl(nir_function_impl *impl,
    nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask);
    nir_metadata_require(impl, nir_metadata_block_index);
 
-   foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
-      bool innermost_loop = true;
-      progress |= process_loops(impl->function->shader, node,
-                                &innermost_loop);
-   }
+   bool has_nested_loop = false;
+   progress |= process_loops_in_block(impl->function->shader, &impl->body,
+                                      &has_nested_loop);
 
-   if (progress)
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_none);
       nir_lower_regs_to_ssa_impl(impl);
+   } else {
+      nir_metadata_preserve(impl, nir_metadata_all);
+   }
 
    return progress;
 }