Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / glsl / loop_unroll.cpp
index 80f92171590966aacbe38aaa18a41fc9b4ee4680..b9ea35077828335b90ce5f9dc26ca34b44a361fc 100644 (file)
 #include "loop_analysis.h"
 #include "ir_hierarchical_visitor.h"
 
+#include "main/mtypes.h"
+
+namespace {
+
 class loop_unroll_visitor : public ir_hierarchical_visitor {
 public:
-   loop_unroll_visitor(loop_state *state, unsigned max_iterations)
+   loop_unroll_visitor(loop_state *state,
+                       const struct gl_shader_compiler_options *options)
    {
       this->state = state;
       this->progress = false;
-      this->max_iterations = max_iterations;
+      this->options = options;
    }
 
    virtual ir_visitor_status visit_leave(ir_loop *ir);
+   void simple_unroll(ir_loop *ir, int iterations);
+   void complex_unroll(ir_loop *ir, int iterations,
+                       bool continue_from_then_branch);
+   void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest);
 
    loop_state *state;
 
    bool progress;
-   unsigned max_iterations;
+   const struct gl_shader_compiler_options *options;
+};
+
+} /* anonymous namespace */
+
+static bool
+is_break(ir_instruction *ir)
+{
+   return ir != NULL && ir->ir_type == ir_type_loop_jump
+                    && ((ir_loop_jump *) ir)->is_break();
+}
+
+class loop_unroll_count : public ir_hierarchical_visitor {
+public:
+   int nodes;
+   bool unsupported_variable_indexing;
+   bool array_indexed_by_induction_var_with_exact_iterations;
+   /* If there are nested loops, the node count will be inaccurate. */
+   bool nested_loop;
+
+   loop_unroll_count(exec_list *list, loop_variable_state *ls,
+                     const struct gl_shader_compiler_options *options)
+      : ls(ls), options(options)
+   {
+      nodes = 0;
+      nested_loop = false;
+      unsupported_variable_indexing = false;
+      array_indexed_by_induction_var_with_exact_iterations = false;
+
+      run(list);
+   }
+
+   virtual ir_visitor_status visit_enter(ir_assignment *)
+   {
+      nodes++;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_enter(ir_expression *)
+   {
+      nodes++;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_enter(ir_loop *)
+   {
+      nested_loop = true;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
+   {
+      /* Force unroll in case of dynamic indexing with sampler arrays
+       * when EmitNoIndirectSampler is set.
+       */
+      if (options->EmitNoIndirectSampler) {
+         if ((ir->array->type->is_array() &&
+              ir->array->type->contains_sampler()) &&
+             !ir->array_index->constant_expression_value()) {
+            unsupported_variable_indexing = true;
+            return visit_continue;
+         }
+      }
+
+      /* Check for arrays variably-indexed by a loop induction variable.
+       * Unrolling the loop may convert that access into constant-indexing.
+       *
+       * Many drivers don't support particular kinds of variable indexing,
+       * and have to resort to using lower_variable_index_to_cond_assign to
+       * handle it.  This results in huge amounts of horrible code, so we'd
+       * like to avoid that if possible.  Here, we just note that it will
+       * happen.
+       */
+      if ((ir->array->type->is_array() || ir->array->type->is_matrix()) &&
+          !ir->array_index->as_constant()) {
+         ir_variable *array = ir->array->variable_referenced();
+         loop_variable *lv = ls->get(ir->array_index->variable_referenced());
+         if (array && lv && lv->is_induction_var()) {
+            /* If an array is indexed by a loop induction variable, and the
+             * array size is exactly the number of loop iterations, this is
+             * probably a simple for-loop trying to access each element in
+             * turn; the application may expect it to be unrolled.
+             */
+            if (int(array->type->length) == ls->limiting_terminator->iterations)
+               array_indexed_by_induction_var_with_exact_iterations = true;
+
+            switch (array->data.mode) {
+            case ir_var_auto:
+            case ir_var_temporary:
+            case ir_var_const_in:
+            case ir_var_function_in:
+            case ir_var_function_out:
+            case ir_var_function_inout:
+               if (options->EmitNoIndirectTemp)
+                  unsupported_variable_indexing = true;
+               break;
+            case ir_var_uniform:
+            case ir_var_shader_storage:
+               if (options->EmitNoIndirectUniform)
+                  unsupported_variable_indexing = true;
+               break;
+            case ir_var_shader_in:
+               if (options->EmitNoIndirectInput)
+                  unsupported_variable_indexing = true;
+               break;
+            case ir_var_shader_out:
+               if (options->EmitNoIndirectOutput)
+                  unsupported_variable_indexing = true;
+               break;
+            }
+         }
+      }
+      return visit_continue;
+   }
+
+private:
+   loop_variable_state *ls;
+   const struct gl_shader_compiler_options *options;
 };
 
 
+/**
+ * Unroll a loop which does not contain any jumps.  For example, if the input
+ * is:
+ *
+ *     (loop (...) ...instrs...)
+ *
+ * And the iteration count is 3, the output will be:
+ *
+ *     ...instrs... ...instrs... ...instrs...
+ */
+void
+loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
+{
+   void *const mem_ctx = ralloc_parent(ir);
+
+   for (int i = 0; i < iterations; i++) {
+      exec_list copy_list;
+
+      copy_list.make_empty();
+      clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
+
+      ir->insert_before(&copy_list);
+   }
+
+   /* The loop has been replaced by the unrolled copies.  Remove the original
+    * loop from the IR sequence.
+    */
+   ir->remove();
+
+   this->progress = true;
+}
+
+
+/**
+ * Unroll a loop whose last statement is an ir_if.  If \c
+ * continue_from_then_branch is true, the loop is repeated only when the
+ * "then" branch of the if is taken; otherwise it is repeated only when the
+ * "else" branch of the if is taken.
+ *
+ * For example, if the input is:
+ *
+ *     (loop (...)
+ *      ...body...
+ *      (if (cond)
+ *          (...then_instrs...)
+ *        (...else_instrs...)))
+ *
+ * And the iteration count is 3, and \c continue_from_then_branch is true,
+ * then the output will be:
+ *
+ *     ...body...
+ *     (if (cond)
+ *         (...then_instrs...
+ *          ...body...
+ *          (if (cond)
+ *              (...then_instrs...
+ *               ...body...
+ *               (if (cond)
+ *                   (...then_instrs...)
+ *                 (...else_instrs...)))
+ *            (...else_instrs...)))
+ *       (...else_instrs))
+ */
+void
+loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations,
+                                    bool continue_from_then_branch)
+{
+   void *const mem_ctx = ralloc_parent(ir);
+   ir_instruction *ir_to_replace = ir;
+
+   for (int i = 0; i < iterations; i++) {
+      exec_list copy_list;
+
+      copy_list.make_empty();
+      clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
+
+      ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if();
+      assert(ir_if != NULL);
+
+      ir_to_replace->insert_before(&copy_list);
+      ir_to_replace->remove();
+
+      /* placeholder that will be removed in the next iteration */
+      ir_to_replace =
+         new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue);
+
+      exec_list *const list = (continue_from_then_branch)
+         ? &ir_if->then_instructions : &ir_if->else_instructions;
+
+      list->push_tail(ir_to_replace);
+   }
+
+   ir_to_replace->remove();
+
+   this->progress = true;
+}
+
+
+/**
+ * Move all of the instructions which follow \c ir_if to the end of
+ * \c splice_dest.
+ *
+ * For example, in the code snippet:
+ *
+ *     (if (cond)
+ *         (...then_instructions...
+ *          break)
+ *       (...else_instructions...))
+ *     ...post_if_instructions...
+ *
+ * If \c ir_if points to the "if" instruction, and \c splice_dest points to
+ * (...else_instructions...), the code snippet is transformed into:
+ *
+ *     (if (cond)
+ *         (...then_instructions...
+ *          break)
+ *       (...else_instructions...
+ *        ...post_if_instructions...))
+ */
+void
+loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if,
+                                                 exec_list *splice_dest)
+{
+   while (!ir_if->get_next()->is_tail_sentinel()) {
+      ir_instruction *move_ir = (ir_instruction *) ir_if->get_next();
+
+      move_ir->remove();
+      splice_dest->push_tail(move_ir);
+   }
+}
+
+
 ir_visitor_status
 loop_unroll_visitor::visit_leave(ir_loop *ir)
 {
    loop_variable_state *const ls = this->state->get(ir);
+   int iterations;
 
    /* If we've entered a loop that hasn't been analyzed, something really,
     * really bad has happened.
@@ -59,42 +318,113 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
    /* Don't try to unroll loops where the number of iterations is not known
     * at compile-time.
     */
-   if (ls->max_iterations < 0)
+   if (ls->limiting_terminator == NULL)
       return visit_continue;
 
+   iterations = ls->limiting_terminator->iterations;
+
+   const int max_iterations = options->MaxUnrollIterations;
+
    /* Don't try to unroll loops that have zillions of iterations either.
     */
-   if (ls->max_iterations > max_iterations)
+   if (iterations > max_iterations)
       return visit_continue;
 
-   if (ls->num_loop_jumps > 0)
+   /* Don't try to unroll nested loops and loops with a huge body.
+    */
+   loop_unroll_count count(&ir->body_instructions, ls, options);
+
+   bool loop_too_large =
+      count.nested_loop || count.nodes * iterations > max_iterations * 5;
+
+   if (loop_too_large && !count.unsupported_variable_indexing &&
+       !count.array_indexed_by_induction_var_with_exact_iterations)
       return visit_continue;
 
-   void *const mem_ctx = talloc_parent(ir);
+   /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
+    * We'll be removing the limiting terminator before we unroll.
+    */
+   assert(ls->num_loop_jumps > 0);
+   unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
 
-   for (int i = 0; i < ls->max_iterations; i++) {
-      exec_list copy_list;
+   if (predicted_num_loop_jumps > 1)
+      return visit_continue;
 
-      copy_list.make_empty();
-      clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
+   if (predicted_num_loop_jumps == 0) {
+      ls->limiting_terminator->ir->remove();
+      simple_unroll(ir, iterations);
+      return visit_continue;
+   }
 
-      ir->insert_before(&copy_list);
+   ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail();
+   assert(last_ir != NULL);
+
+   if (is_break(last_ir)) {
+      /* If the only loop-jump is a break at the end of the loop, the loop
+       * will execute exactly once.  Remove the break and use the simple
+       * unroller with an iteration count of 1.
+       */
+      last_ir->remove();
+
+      ls->limiting_terminator->ir->remove();
+      simple_unroll(ir, 1);
+      return visit_continue;
    }
 
-   /* The loop has been replaced by the unrolled copies.  Remove the original
-    * loop from the IR sequence.
-    */
-   ir->remove();
+   /* recognize loops in the form produced by ir_lower_jumps */
+   foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) {
+      /* Skip the limiting terminator, since it will go away when we
+       * unroll.
+       */
+      if (cur_ir == ls->limiting_terminator->ir)
+         continue;
 
-   this->progress = true;
+      ir_if *ir_if = cur_ir->as_if();
+      if (ir_if != NULL) {
+         /* Determine which if-statement branch, if any, ends with a
+          * break.  The branch that did *not* have the break will get a
+          * temporary continue inserted in each iteration of the loop
+          * unroll.
+          *
+          * Note that since ls->num_loop_jumps is <= 1, it is impossible
+          * for both branches to end with a break.
+          */
+         ir_instruction *ir_if_last =
+            (ir_instruction *) ir_if->then_instructions.get_tail();
+
+         if (is_break(ir_if_last)) {
+            ls->limiting_terminator->ir->remove();
+            splice_post_if_instructions(ir_if, &ir_if->else_instructions);
+            ir_if_last->remove();
+            complex_unroll(ir, iterations, false);
+            return visit_continue;
+         } else {
+            ir_if_last =
+               (ir_instruction *) ir_if->else_instructions.get_tail();
+
+            if (is_break(ir_if_last)) {
+               ls->limiting_terminator->ir->remove();
+               splice_post_if_instructions(ir_if, &ir_if->then_instructions);
+               ir_if_last->remove();
+               complex_unroll(ir, iterations, true);
+               return visit_continue;
+            }
+         }
+      }
+   }
+
+   /* Did not find the break statement.  It must be in a complex if-nesting,
+    * so don't try to unroll.
+    */
    return visit_continue;
 }
 
 
 bool
-unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations)
+unroll_loops(exec_list *instructions, loop_state *ls,
+             const struct gl_shader_compiler_options *options)
 {
-   loop_unroll_visitor v(ls, max_iterations);
+   loop_unroll_visitor v(ls, options);
 
    v.run(instructions);