glsl/lower_if: conditionally lower if-branches based on their size
authorMarek Olšák <marek.olsak@amd.com>
Sun, 3 Jul 2016 15:11:07 +0000 (17:11 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 15 Nov 2016 19:23:39 +0000 (20:23 +0100)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/compiler/glsl/ir_optimization.h
src/compiler/glsl/lower_if_to_cond_assign.cpp

index e6e8318a3aa5b8d4d2a002306ec96eb9d9446973..0d6c4e6a66a216968ed98367a066bbcb9d4d6450 100644 (file)
@@ -109,7 +109,7 @@ bool do_if_simplification(exec_list *instructions);
 bool opt_flatten_nested_if_blocks(exec_list *instructions);
 bool do_discard_simplification(exec_list *instructions);
 bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
-                             unsigned max_depth = 0);
+                             unsigned max_depth = 0, unsigned min_branch_cost = 0);
 bool do_mat_op_to_vec(exec_list *instructions);
 bool do_minmax_prune(exec_list *instructions);
 bool do_noop_swizzle(exec_list *instructions);
index e8db7aa9b7f5a28b24fbf4ac418611d5ead6394e..ae048be0d27b58e92f83c7c81e083e1118b57dcd 100644 (file)
 /**
  * \file lower_if_to_cond_assign.cpp
  *
- * This attempts to flatten if-statements to conditional assignments for
- * GPUs with limited or no flow control support.
+ * This flattens if-statements to conditional assignments if:
+ *
+ * - the GPU has limited or no flow control support
+ *   (controlled by max_depth)
+ *
+ * - small conditional branches are more expensive than conditional assignments
+ *   (controlled by min_branch_cost, that's the cost for a branch to be
+ *    preserved)
  *
  * It can't handle other control flow being inside of its block, such
  * as calls or loops.  Hopefully loop unrolling and inlining will take
 #include "ir.h"
 #include "util/set.h"
 #include "util/hash_table.h" /* Needed for the hashing functions */
+#include "main/macros.h" /* for MAX2 */
 
 namespace {
 
 class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
 public:
    ir_if_to_cond_assign_visitor(gl_shader_stage stage,
-                                unsigned max_depth)
+                                unsigned max_depth,
+                                unsigned min_branch_cost)
    {
       this->progress = false;
       this->stage = stage;
       this->max_depth = max_depth;
+      this->min_branch_cost = min_branch_cost;
       this->depth = 0;
 
       this->condition_variables =
@@ -76,8 +85,13 @@ public:
    ir_visitor_status visit_leave(ir_if *);
 
    bool found_unsupported_op;
+   bool found_expensive_op;
+   bool is_then;
    bool progress;
    gl_shader_stage stage;
+   unsigned then_cost;
+   unsigned else_cost;
+   unsigned min_branch_cost;
    unsigned max_depth;
    unsigned depth;
 
@@ -88,12 +102,12 @@ public:
 
 bool
 lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
-                        unsigned max_depth)
+                        unsigned max_depth, unsigned min_branch_cost)
 {
    if (max_depth == UINT_MAX)
       return false;
 
-   ir_if_to_cond_assign_visitor v(stage, max_depth);
+   ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
 
    visit_list_elements(&v, instructions);
 
@@ -129,6 +143,20 @@ check_ir_node(ir_instruction *ir, void *data)
       break;
    }
 
+   /* SSBO, images, atomic counters are handled by ir_type_call */
+   case ir_type_texture:
+      v->found_expensive_op = true;
+      break;
+
+   case ir_type_expression:
+   case ir_type_dereference_array:
+   case ir_type_dereference_record:
+      if (v->is_then)
+         v->then_cost++;
+      else
+         v->else_cost++;
+      break;
+
    default:
       break;
    }
@@ -193,24 +221,39 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
 ir_visitor_status
 ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
 {
+   bool must_lower = this->depth-- > this->max_depth;
+
    /* Only flatten when beyond the GPU's maximum supported nesting depth. */
-   if (this->depth-- <= this->max_depth)
+   if (!must_lower && this->min_branch_cost == 0)
       return visit_continue;
 
    this->found_unsupported_op = false;
+   this->found_expensive_op = false;
+   this->then_cost = 0;
+   this->else_cost = 0;
 
    ir_assignment *assign;
 
    /* Check that both blocks don't contain anything we can't support. */
+   this->is_then = true;
    foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
       visit_tree(then_ir, check_ir_node, this);
    }
+
+   this->is_then = false;
    foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
       visit_tree(else_ir, check_ir_node, this);
    }
+
    if (this->found_unsupported_op)
       return visit_continue; /* can't handle inner unsupported opcodes */
 
+   /* Skip if the branch cost is high enough or if there's an expensive op. */
+   if (!must_lower &&
+       (this->found_expensive_op ||
+        MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
+      return visit_continue;
+
    void *mem_ctx = ralloc_parent(ir);
 
    /* Store the condition to a variable.  Move all of the instructions from