glsl ubo/ssbo: Use enum to track current buffer access type
[mesa.git] / src / glsl / loop_unroll.cpp
index e1009169370483c11681572f8b828ccad9081084..b9ea35077828335b90ce5f9dc26ca34b44a361fc 100644 (file)
 #include "loop_analysis.h"
 #include "ir_hierarchical_visitor.h"
 
+#include "main/mtypes.h"
+
 namespace {
 
 class loop_unroll_visitor : public ir_hierarchical_visitor {
 public:
-   loop_unroll_visitor(loop_state *state, unsigned max_iterations)
+   loop_unroll_visitor(loop_state *state,
+                       const struct gl_shader_compiler_options *options)
    {
       this->state = state;
       this->progress = false;
-      this->max_iterations = max_iterations;
+      this->options = options;
    }
 
    virtual ir_visitor_status visit_leave(ir_loop *ir);
@@ -45,7 +48,7 @@ public:
    loop_state *state;
 
    bool progress;
-   unsigned max_iterations;
+   const struct gl_shader_compiler_options *options;
 };
 
 } /* anonymous namespace */
@@ -60,33 +63,109 @@ is_break(ir_instruction *ir)
 class loop_unroll_count : public ir_hierarchical_visitor {
 public:
    int nodes;
-   bool fail;
-
-   loop_unroll_count(exec_list *list)
+   bool unsupported_variable_indexing;
+   bool array_indexed_by_induction_var_with_exact_iterations;
+   /* If there are nested loops, the node count will be inaccurate. */
+   bool nested_loop;
+
+   loop_unroll_count(exec_list *list, loop_variable_state *ls,
+                     const struct gl_shader_compiler_options *options)
+      : ls(ls), options(options)
    {
       nodes = 0;
-      fail = false;
+      nested_loop = false;
+      unsupported_variable_indexing = false;
+      array_indexed_by_induction_var_with_exact_iterations = false;
 
       run(list);
    }
 
-   virtual ir_visitor_status visit_enter(ir_assignment *ir)
+   virtual ir_visitor_status visit_enter(ir_assignment *)
    {
       nodes++;
       return visit_continue;
    }
 
-   virtual ir_visitor_status visit_enter(ir_expression *ir)
+   virtual ir_visitor_status visit_enter(ir_expression *)
    {
       nodes++;
       return visit_continue;
    }
 
-   virtual ir_visitor_status visit_enter(ir_loop *ir)
+   virtual ir_visitor_status visit_enter(ir_loop *)
    {
-      fail = true;
+      nested_loop = true;
       return visit_continue;
    }
+
+   virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
+   {
+      /* Force unroll in case of dynamic indexing with sampler arrays
+       * when EmitNoIndirectSampler is set.
+       */
+      if (options->EmitNoIndirectSampler) {
+         if ((ir->array->type->is_array() &&
+              ir->array->type->contains_sampler()) &&
+             !ir->array_index->constant_expression_value()) {
+            unsupported_variable_indexing = true;
+            return visit_continue;
+         }
+      }
+
+      /* Check for arrays variably-indexed by a loop induction variable.
+       * Unrolling the loop may convert that access into constant-indexing.
+       *
+       * Many drivers don't support particular kinds of variable indexing,
+       * and have to resort to using lower_variable_index_to_cond_assign to
+       * handle it.  This results in huge amounts of horrible code, so we'd
+       * like to avoid that if possible.  Here, we just note that it will
+       * happen.
+       */
+      if ((ir->array->type->is_array() || ir->array->type->is_matrix()) &&
+          !ir->array_index->as_constant()) {
+         ir_variable *array = ir->array->variable_referenced();
+         loop_variable *lv = ls->get(ir->array_index->variable_referenced());
+         if (array && lv && lv->is_induction_var()) {
+            /* If an array is indexed by a loop induction variable, and the
+             * array size is exactly the number of loop iterations, this is
+             * probably a simple for-loop trying to access each element in
+             * turn; the application may expect it to be unrolled.
+             */
+            if (int(array->type->length) == ls->limiting_terminator->iterations)
+               array_indexed_by_induction_var_with_exact_iterations = true;
+
+            switch (array->data.mode) {
+            case ir_var_auto:
+            case ir_var_temporary:
+            case ir_var_const_in:
+            case ir_var_function_in:
+            case ir_var_function_out:
+            case ir_var_function_inout:
+               if (options->EmitNoIndirectTemp)
+                  unsupported_variable_indexing = true;
+               break;
+            case ir_var_uniform:
+            case ir_var_shader_storage:
+               if (options->EmitNoIndirectUniform)
+                  unsupported_variable_indexing = true;
+               break;
+            case ir_var_shader_in:
+               if (options->EmitNoIndirectInput)
+                  unsupported_variable_indexing = true;
+               break;
+            case ir_var_shader_out:
+               if (options->EmitNoIndirectOutput)
+                  unsupported_variable_indexing = true;
+               break;
+            }
+         }
+      }
+      return visit_continue;
+   }
+
+private:
+   loop_variable_state *ls;
+   const struct gl_shader_compiler_options *options;
 };
 
 
@@ -236,30 +315,43 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
       return visit_continue;
    }
 
-   iterations = ls->max_iterations;
-
    /* Don't try to unroll loops where the number of iterations is not known
     * at compile-time.
     */
-   if (iterations < 0)
+   if (ls->limiting_terminator == NULL)
       return visit_continue;
 
+   iterations = ls->limiting_terminator->iterations;
+
+   const int max_iterations = options->MaxUnrollIterations;
+
    /* Don't try to unroll loops that have zillions of iterations either.
     */
-   if (iterations > (int) max_iterations)
+   if (iterations > max_iterations)
       return visit_continue;
 
    /* Don't try to unroll nested loops and loops with a huge body.
     */
-   loop_unroll_count count(&ir->body_instructions);
+   loop_unroll_count count(&ir->body_instructions, ls, options);
+
+   bool loop_too_large =
+      count.nested_loop || count.nodes * iterations > max_iterations * 5;
 
-   if (count.fail || count.nodes * iterations > (int)max_iterations * 5)
+   if (loop_too_large && !count.unsupported_variable_indexing &&
+       !count.array_indexed_by_induction_var_with_exact_iterations)
       return visit_continue;
 
-   if (ls->num_loop_jumps > 1)
+   /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
+    * We'll be removing the limiting terminator before we unroll.
+    */
+   assert(ls->num_loop_jumps > 0);
+   unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
+
+   if (predicted_num_loop_jumps > 1)
       return visit_continue;
 
-   if (ls->num_loop_jumps == 0) {
+   if (predicted_num_loop_jumps == 0) {
+      ls->limiting_terminator->ir->remove();
       simple_unroll(ir, iterations);
       return visit_continue;
    }
@@ -274,13 +366,18 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
        */
       last_ir->remove();
 
+      ls->limiting_terminator->ir->remove();
       simple_unroll(ir, 1);
       return visit_continue;
    }
 
-   foreach_list(node, &ir->body_instructions) {
-      /* recognize loops in the form produced by ir_lower_jumps */
-      ir_instruction *cur_ir = (ir_instruction *) node;
+   /* recognize loops in the form produced by ir_lower_jumps */
+   foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) {
+      /* Skip the limiting terminator, since it will go away when we
+       * unroll.
+       */
+      if (cur_ir == ls->limiting_terminator->ir)
+         continue;
 
       ir_if *ir_if = cur_ir->as_if();
       if (ir_if != NULL) {
@@ -296,6 +393,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
             (ir_instruction *) ir_if->then_instructions.get_tail();
 
          if (is_break(ir_if_last)) {
+            ls->limiting_terminator->ir->remove();
             splice_post_if_instructions(ir_if, &ir_if->else_instructions);
             ir_if_last->remove();
             complex_unroll(ir, iterations, false);
@@ -305,6 +403,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
                (ir_instruction *) ir_if->else_instructions.get_tail();
 
             if (is_break(ir_if_last)) {
+               ls->limiting_terminator->ir->remove();
                splice_post_if_instructions(ir_if, &ir_if->then_instructions);
                ir_if_last->remove();
                complex_unroll(ir, iterations, true);
@@ -322,9 +421,10 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
 
 
 bool
-unroll_loops(exec_list *instructions, loop_state *ls, unsigned max_iterations)
+unroll_loops(exec_list *instructions, loop_state *ls,
+             const struct gl_shader_compiler_options *options)
 {
-   loop_unroll_visitor v(ls, max_iterations);
+   loop_unroll_visitor v(ls, options);
 
    v.run(instructions);